bitkeeper revision 1.1473.1.1 (428df901D5uzzXaFBp8z6tkbP0gV0w)
authorakw27@arcadians.cl.cam.ac.uk <akw27@arcadians.cl.cam.ac.uk>
Fri, 20 May 2005 14:49:37 +0000 (14:49 +0000)
committerakw27@arcadians.cl.cam.ac.uk <akw27@arcadians.cl.cam.ac.uk>
Fri, 20 May 2005 14:49:37 +0000 (14:49 +0000)
- Finer-grained asynchronous dispatch in parallax daemon.
- Cleanups and cull of older code.
- Fixes to handle changes in block protocol.

Signed-off-by: andrew.warfield@cl.cam.ac.uk
33 files changed:
.rootkeys
tools/blktap/Makefile
tools/blktap/blkaio.c [deleted file]
tools/blktap/blkaiolib.c [deleted file]
tools/blktap/blkaiolib.h [deleted file]
tools/blktap/blkcow.c [deleted file]
tools/blktap/blkcowgnbd.c [deleted file]
tools/blktap/blkcowimg.c [deleted file]
tools/blktap/blkcowlib.c [deleted file]
tools/blktap/blkcowlib.h [deleted file]
tools/blktap/blkdump.c
tools/blktap/blkgnbd.c [deleted file]
tools/blktap/blkgnbdlib.c [deleted file]
tools/blktap/blkgnbdlib.h [deleted file]
tools/blktap/blkimg.c [deleted file]
tools/blktap/blkimglib.c [deleted file]
tools/blktap/blkimglib.h [deleted file]
tools/blktap/block-async.c [new file with mode: 0755]
tools/blktap/block-async.h [new file with mode: 0755]
tools/blktap/blockstore-tls.c [deleted file]
tools/blktap/blockstore.c
tools/blktap/libgnbd/Makefile [deleted file]
tools/blktap/libgnbd/gnbdtest.c [deleted file]
tools/blktap/libgnbd/libgnbd.c [deleted file]
tools/blktap/libgnbd/libgnbd.h [deleted file]
tools/blktap/parallax-threaded.c
tools/blktap/parallax.c
tools/blktap/radix.c
tools/blktap/radix.h
tools/blktap/requests-async.c [new file with mode: 0755]
tools/blktap/requests-async.h [new file with mode: 0755]
tools/blktap/vdi.c
tools/blktap/vdi.h

index a4eeac0c72d093d85b377cdde7932bc5a5bfe392..67dc5d004ba0be2d80b717cff0b2cc8216cfd7d4 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
 4209033eUwhDBJ_bxejiv5c6gjXS4A tools/blktap/Makefile
 4209033ewLAHdhGrT_2jo3Gb_5bDcA tools/blktap/README
 42277b02mYXxgijE7MFeUe9d8eldMw tools/blktap/README-PARALLAX
-4209033eX_Xw94wHaOCtnU9nOAtSJA tools/blktap/blkaio.c
-4209033egwf6LDxM2hbaqi9rRdZy4A tools/blktap/blkaiolib.c
-4209033f9yELLK85Ipo2oKjr3ickgQ tools/blktap/blkaiolib.h
-4209033fL9LcSI6LXrIp5O4axbUBLg tools/blktap/blkcow.c
-4209033fUDlFGZreIyZHdP7h7yfvuQ tools/blktap/blkcowgnbd.c
-4209033fCgZzLeMOwNBFmsp99x58ZQ tools/blktap/blkcowimg.c
-4209033frfXH6oOi9AvRz08PPAndNA tools/blktap/blkcowlib.c
-4209033fhFd_y2go9HgCF395A35xJg tools/blktap/blkcowlib.h
 4209033fHgtGpb_K16_xC9CpkjNZLw tools/blktap/blkdump.c
-4209033fm61CZG1RyKDW75V-eTZ9fg tools/blktap/blkgnbd.c
-4209033fVfa-R6MFgGcmsQHTDna4PA tools/blktap/blkgnbdlib.c
-4209033fIgDQbaHwHStHhPEDTtbqsA tools/blktap/blkgnbdlib.h
-4209033figp5JRsKsXY8rw4keRumkg tools/blktap/blkimg.c
-42090340V-8HKGlr00SyJGsE5jXC3A tools/blktap/blkimglib.c
-42090340c7pQbh0Km8zLcEqPd_3zIg tools/blktap/blkimglib.h
 42090340_mvZtozMjghPJO0qsjk4NQ tools/blktap/blkint.h
 42090340rc2q1wmlGn6HtiJAkqhtNQ tools/blktap/blktaplib.c
 42090340C-WkRPT7N3t-8Lzehzogdw tools/blktap/blktaplib.h
-423f270cAbkh2f-DHtT0hmCtFFXVXg tools/blktap/blockstore-tls.c
+428df8fdkg84W8yveE50EbkbTUZgjQ tools/blktap/block-async.c
+428df8feTrgGFZEBMA_dYijy9DNs1g tools/blktap/block-async.h
 42277b02WrfP1meTDPv1M5swFq8oHQ tools/blktap/blockstore.c
 42277b02P1C0FYj3gqwTZUD8sxKCug tools/blktap/blockstore.h
 42371b8aL1JsxAXOd4bBhmZKDyjiJg tools/blktap/blockstored.c
 42371b8aD_x3L9MKsXciMNqkuk58eQ tools/blktap/bstest.c
-42090340B3mDvcxvd9ehDHUkg46hvw tools/blktap/libgnbd/Makefile
-42090340ZWkc5Xhf9lpQmDON8HJXww tools/blktap/libgnbd/gnbdtest.c
-42090340ocMiUScJE3OpY7QNunvSbg tools/blktap/libgnbd/libgnbd.c
-42090340G5_F_EeVnPORKB0pTMGGhA tools/blktap/libgnbd/libgnbd.h
 423f270cbEKiTMapKnCyqkuwGvgOMA tools/blktap/parallax-threaded.c
 423f270cFdXryIcD7HTPUl_Dbk4DAQ tools/blktap/parallax-threaded.h
 42277b03930x2TJT3PZlw6o0GERXpw tools/blktap/parallax.c
 42277b03XQYq8bujXSz7JAZ8N7j_pA tools/blktap/radix.c
 42277b03vZ4-jno_mgKmAcCW3ycRAg tools/blktap/radix.h
+428df8fe5RYONloDWVMkM-CfHfB1vA tools/blktap/requests-async.c
+428df8feWeKJ-9HJb5_rFqdm_xqErg tools/blktap/requests-async.h
 42277b03U_wLHL-alMA0bfxGlqldXg tools/blktap/snaplog.c
 42277b04Ryya-z662BEx8HnxNN0dGQ tools/blktap/snaplog.h
 42277b04LxFjptgZ75Z98DUAso4Prg tools/blktap/vdi.c
index 9d794301c3ad830c3d1586e2c2b325541215954c..26187de77963757f9fd98afc74941d48ee77fc4b 100644 (file)
@@ -22,12 +22,12 @@ PLX_SRCS :=
 PLX_SRCS += vdi.c 
 PLX_SRCS += radix.c 
 PLX_SRCS += snaplog.c
+PLX_SRCS += blockstore.c 
+PLX_SRCS += block-async.c
 PLXT_SRCS := $(PLX_SRCS)
-#PLXT_SRCS += blockstore-tls.c
-PLXT_SRCS += blockstore.c
 PLXT_SRCS += parallax-threaded.c
-PLX_SRCS += blockstore.c 
 VDI_SRCS := $(PLX_SRCS)
+PLX_SRCS += requests-async.c
 PLX_SRCS += parallax.c
 
 VDI_TOOLS :=
@@ -55,10 +55,11 @@ CFLAGS   += -Wp,-MD,.$(@F).d
 DEPS     = .*.d
 
 OBJS     = $(patsubst %.c,%.o,$(SRCS))
+IBINS    = blkdump parallax $(VDI_TOOLS)
 
 LIB      = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
 
-all: mk-symlinks blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd $(VDI_TOOLS) parallax parallax-threaded blockstored
+all: mk-symlinks blkdump $(VDI_TOOLS) parallax parallax-threaded blockstored
        $(MAKE) $(LIB)
 
 LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
@@ -77,10 +78,10 @@ install: all
        $(INSTALL_DIR) -p $(DESTDIR)/usr/include
        $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
        $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
-       $(INSTALL_PROG) blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd $(DESTDIR)/$(BLKTAP_INSTALL_DIR)
+       $(INSTALL_PROG) $(IBINS) $(DESTDIR)/$(BLKTAP_INSTALL_DIR)
 
 clean:
-       rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio $(VDI_TOOLS) parallax
+       rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump $(VDI_TOOLS) parallax parallax-threaded
 
 rpm: all
        rm -rf staging
@@ -101,32 +102,11 @@ libblktap.so.$(MAJOR).$(MINOR): $(OBJS)
 blkdump: $(LIB)
        $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkdump.c
 
-blkcowimg: $(LIB) blkcowimg.c blkcowlib.c blkimglib.c 
-       $(CC) $(CFLAGS) -o blkcowimg -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkcowimg.c blkimglib.c blkcowlib.c
-
-blkcow: $(LIB) blkcow.c blkcowlib.c
-       $(CC) $(CFLAGS) -o blkcow -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkcow.c blkcowlib.c
-
-blkimg: $(LIB) blkimg.c blkimglib.c
-       $(CC) $(CFLAGS) -o blkimg  -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkimg.c blkimglib.c
-
-blkgnbd: $(LIB) blkgnbd.c blkgnbdlib.c
-       $(CC) $(CFLAGS) -o blkgnbd -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkgnbd.c blkgnbdlib.c libgnbd/libgnbd.a
-
-blkcowgnbd: $(LIB) blkgnbd.c blkcowlib.c blkgnbdlib.c
-       $(CC) $(CFLAGS) -o blkcowgnbd -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkcowgnbd.c blkgnbdlib.c blkcowlib.c libgnbd/libgnbd.a
-
-blkaio: $(LIB) blkaio.c blkaiolib.c
-       $(CC) $(CFLAGS) -o blkaio -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkaio.c blkaiolib.c -laio -lpthread
-
 parallax: $(LIB) $(PLX_SRCS)
-       $(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap -lpthread $(PLX_SRCS) libgnbd/libgnbd.a
+       $(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap -lpthread $(PLX_SRCS) 
 
 parallax-threaded: $(LIB) $(PLXT_SRCS)
-       $(CC) $(CFLAGS) -o parallax-threaded -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lpthread -lblktap $(PLXT_SRCS) libgnbd/libgnbd.a
-
-vdi_test: $(LIB) $(VDI_SRCS)
-       $(CC) $(CFLAGS) -g3 -o vdi_test -DVDI_STANDALONE -lpthread $(VDI_SRCS)
+       $(CC) $(CFLAGS) -o parallax-threaded -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lpthread -lblktap $(PLXT_SRCS)
 
 vdi_list: $(LIB) vdi_list.c $(VDI_SRCS)
        $(CC) $(CFLAGS) -g3 -o vdi_list vdi_list.c -lpthread $(VDI_SRCS)
@@ -163,16 +143,3 @@ TAGS:
 
 -include $(DEPS)
 
-#Random testing targets.  To be removed eventually.
-
-rdx_cmp: $(LIB) rdx_cmp.c $(VDI_SRCS)
-       $(CC) $(CFLAGS) -g3 -o rdx_cmp rdx_cmp.c $(VDI_SRCS)
-
-bb-tls: $(LIB) blockstore-benchmark.c
-       $(CC) $(CFLAGS) -o bb-tls blockstore-benchmark.c blockstore-tls.c -lpthread
-
-bb-trans: $(LIB) blockstore-benchmark.c
-       $(CC) $(CFLAGS) -o bb-trans blockstore-benchmark.c blockstore.c -lpthread
-
-radix-test: $(LIB) radix.c blockstore.c
-       $(CC) $(CFLAGS) -g3 -D RADIX_STANDALONE -o radix-test radix.c blockstore-threaded-trans.c
diff --git a/tools/blktap/blkaio.c b/tools/blktap/blkaio.c
deleted file mode 100644 (file)
index 2549571..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/* blkaio.c
- *
- * libaio-backed disk.
- */
-
-#include "blktaplib.h"
-#include "blkaiolib.h"
-
-
-int main(int argc, char *argv[])
-{
-    aio_init();
-    
-    blktap_register_ctrl_hook("aio_control", aio_control);
-    blktap_register_request_hook("aio_request", aio_request);
-    blktap_listen();
-    
-    return 0;
-}
diff --git a/tools/blktap/blkaiolib.c b/tools/blktap/blkaiolib.c
deleted file mode 100644 (file)
index 4538a9e..0000000
+++ /dev/null
@@ -1,489 +0,0 @@
-/* blkaiolib.c
- *
- * file/device image-backed block device -- using linux libaio.
- * 
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- *
- * NOTE: This doesn't work.  Grrr.
- */
-
-#define _GNU_SOURCE
-#define __USE_LARGEFILE64
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <string.h>
-#include <db.h>       
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/poll.h>
-#include <unistd.h>
-#include <errno.h>
-#include <libaio.h>
-#include <pthread.h>
-#include <time.h>
-#include "blktaplib.h"
-
-//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
-#define TMP_IMAGE_FILE_NAME "fc3.image"
-
-#define MAX_DOMS              1024
-#define MAX_IMGNAME_LEN        255
-#define AMORFS_DEV           61440
-#define MAX_REQUESTS            64 /* must be synced with the blkif drivers. */
-#define MAX_SEGMENTS_PER_REQ    11
-#define SECTOR_SHIFT             9
-#define MAX_AIO_REQS   (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
-                                                                                
-#if 1
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-           
-#if 1                                                                        
-#define ASSERT(_p) \
-    if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
-    __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p) ((void)0)
-#endif                                                                     
-
-char dbg_page[4096];
-
-typedef struct {
-    /* These need to turn into an array/rbtree for multi-disk support. */
-    int  fd;
-    u64  fsid;
-    char imgname[MAX_IMGNAME_LEN];
-    blkif_vdev_t   vdevice;
-} image_t;
-
-/* Note on pending_reqs: I assume all reqs are queued before they start to 
- * get filled.  so count of 0 is an unused record.
- */
-typedef struct {
-    blkif_request_t  req;
-    int              count;
-} pending_req_t;
-
-static pending_req_t    pending_list[MAX_REQUESTS];
-image_t                *images[MAX_DOMS];
-
-static io_context_t  ctx;
-static struct iocb  *iocb_free[MAX_AIO_REQS];
-static int           iocb_free_count;
-
-/* ---[ Notification mecahnism ]--------------------------------------- */
-
-enum { 
-    READ   = 0,
-    WRITE  = 1
-};
-
-static int aio_notify[2];
-static volatile int aio_listening = 0;
-
-static struct io_event aio_events[MAX_AIO_REQS];
-static int             aio_event_count = 0;
-
-/* this is commented out in libaio.h for some reason. */
-extern int io_queue_wait(io_context_t ctx, struct timespec *timeout);
-
-static void *notifier_thread(void *arg)
-{
-    int ret; 
-    int msg = 0x00feeb00;
-    
-    printf("Notifier thread started.\n");
-    for (;;) {
-        //if ((aio_listening) && ((ret = io_queue_wait(ctx, 0)) == 0)) {
-        if ((aio_listening) && 
-           ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0)) {
-            aio_event_count = ret;
-            printf("[Notifying! (%d)]\n", aio_event_count);
-            aio_listening = 0;
-            write(aio_notify[WRITE], &msg, sizeof(msg));
-            fsync(aio_notify[WRITE]);
-        } else {
-            if (aio_listening)
-                printf("[io_queue_wait error! %d]\n", errno);
-            usleep(1000); /* Not ready to read. */
-        }
-    }
-}
-
-/* -------------------------------------------------------------------- */
-
-int aio_control(control_msg_t *msg)
-{
-    domid_t  domid;
-    DB      *db;
-    int      ret;
-    
-    if (msg->type != CMSG_BLKIF_BE) 
-    {
-        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
-        return 0;
-    }
-    
-    switch(msg->subtype)
-    {
-    case CMSG_BLKIF_BE_CREATE:
-        if ( msg->length != sizeof(blkif_be_create_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
-                ((blkif_be_create_t *)msg->msg)->domid,
-                ((blkif_be_create_t *)msg->msg)->blkif_handle);
-        domid = ((blkif_be_create_t *)msg->msg)->domid;
-        if (images[domid] != NULL) {
-            printf("attempt to connect from an existing dom!\n");
-            return 0;
-        }
-        
-        images[domid] = (image_t *)malloc(sizeof(image_t));
-        if (images[domid] == NULL) {
-            printf("error allocating image record.\n");
-            return 0;
-        }
-        
-        images[domid]->fd  = -1;
-        images[domid]->fsid = 0;
-        
-        printf("Image connected.\n");
-        break;   
-        
-    case CMSG_BLKIF_BE_DESTROY:
-        if ( msg->length != sizeof(blkif_be_destroy_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
-                ((blkif_be_destroy_t *)msg->msg)->domid,
-                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
-        
-        domid = ((blkif_be_destroy_t *)msg->msg)->domid;
-        if (images[domid] != NULL) {
-            if (images[domid]->fd != -1)
-                close( images[domid]->fd );
-            free( images[domid] );
-            images[domid] = NULL;
-        }
-        break;  
-    case CMSG_BLKIF_BE_VBD_GROW:
-    {
-        blkif_be_vbd_grow_t *grow;
-        
-        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
-                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
-                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
-                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
-        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
-        grow = (blkif_be_vbd_grow_t *)msg->msg;
-        domid = grow->domid;
-        if (images[domid] == NULL) {
-            printf("VBD_GROW on unconnected domain!\n");
-            return 0;
-        }
-        
-        if (grow->extent.device != AMORFS_DEV) {
-            printf("VBD_GROW on non-amorfs device!\n");
-            return 0;
-        }
-        
-        /* TODO: config support for arbitrary image files/modes. */
-        sprintf(images[domid]->imgname, TMP_IMAGE_FILE_NAME);
-        
-        images[domid]->fsid   = grow->extent.sector_start;
-        images[domid]->vdevice = grow->vdevice; 
-        images[domid]->fd = open(TMP_IMAGE_FILE_NAME, 
-                O_RDWR | O_DIRECT | O_LARGEFILE);
-        if (images[domid]->fd < 0) {
-            printf("Couldn't open image file! %d\n", errno);
-            return 0;
-        }
-        
-        printf("Image file opened. (%s)\n", images[domid]->imgname);
-        break;
-    }    
-    }
-    return 0;
-parse_error:
-    printf("Bad control message!\n");
-    return 0;
-    
-create_failed:
-    /* TODO: close the db ref. */
-    return 0;
-}    
-int aio_request(blkif_request_t *req)
-{
-    int fd;
-    u64 sector;
-    char *spage, *dpage;
-    int ret, i, idx;
-    blkif_response_t *rsp;
-    domid_t dom = ID_TO_DOM(req->id);
-    
-    if ((images[dom] == NULL) || (images[dom]->fd == -1)) {
-        printf("Data request for unknown domain!!! %d\n", dom);
-        rsp = (blkif_response_t *)req;
-        rsp->id = req->id;
-        rsp->operation = req->operation;
-        rsp->status = BLKIF_RSP_ERROR;
-        return BLKTAP_RESPOND;
-    }
-    
-    fd = images[dom]->fd;
-    
-    switch (req->operation) 
-    {
-    case BLKIF_OP_PROBE:
-    {
-        struct stat stat;
-        vdisk_t *img_info;
-        
-        
-        /* We expect one buffer only. */
-        if ( req->nr_segments != 1 )
-            goto err;
-                                                                                
-        /* Make sure the buffer is page-sized. */
-        if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
-             (blkif_last_sect (req->frame_and_sects[0]) != 7) )
-            goto err;
-
-        /* loop for multiple images would start here. */
-        
-        ret = fstat(fd, &stat);
-        if (ret != 0) {
-            printf("Couldn't stat image in PROBE!\n");
-            goto err;
-        }
-        
-        img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
-        img_info[0].device   = images[dom]->vdevice;
-        img_info[0].info     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
-        img_info[0].capacity = (stat.st_size >> SECTOR_SHIFT);
-        
-        if (img_info[0].capacity == 0)
-            img_info[0].capacity = ((u64)1 << 63); // xend does this too.
-        
-        DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", img_info[0].device,
-                img_info[0].capacity);
-        
-        rsp = (blkif_response_t *)req;
-        rsp->id = req->id;
-        rsp->operation = BLKIF_OP_PROBE;
-        rsp->status = 1; /* number of disks */
-        
-        return  BLKTAP_RESPOND;
-    }    
-    case BLKIF_OP_WRITE:
-    {
-        unsigned long size;
-        struct iocb *io;
-        struct iocb *ioq[MAX_SEGMENTS_PER_REQ]; 
-        
-        idx = ID_TO_IDX(req->id);
-        ASSERT(pending_list[idx].count == 0);
-        memcpy(&pending_list[idx].req, req, sizeof(*req));
-        pending_list[idx].count = req->nr_segments;
-        
-        for (i = 0; i < req->nr_segments; i++) {
-            
-            sector = req->sector_number + (8*i);
-            
-            size = blkif_last_sect (req->frame_and_sects[i]) -
-                   blkif_first_sect(req->frame_and_sects[i]) + 1;
-            
-            DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
-                    req->sector_number, sector, 
-                    blkif_first_sect(req->frame_and_sects[i]),
-                    blkif_last_sect (req->frame_and_sects[i]),
-                    (long)(sector << SECTOR_SHIFT));
-                        
-            spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-            spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
-            
-            /*convert size and sector to byte offsets */
-            size   <<= SECTOR_SHIFT;
-            sector <<= SECTOR_SHIFT;
-            
-            io = iocb_free[--iocb_free_count];
-            io_prep_pwrite(io, fd, spage, size, sector);
-            io->data = (void *)idx;
-            ioq[i] = io;
-        }
-        
-        ret = io_submit(ctx, req->nr_segments, ioq);
-        if (ret < 0)
-            printf("BADNESS: io_submit error! (%d)\n", errno);
-        
-        pending_list[idx].count = req->nr_segments;
-        
-        return BLKTAP_STOLEN;
-        
-    }
-    case BLKIF_OP_READ:
-    {
-        unsigned long size;
-        struct iocb *io;
-        struct iocb *ioq[MAX_SEGMENTS_PER_REQ]; 
-        
-        idx = ID_TO_IDX(req->id);
-        ASSERT(pending_list[idx].count == 0);
-        memcpy(&pending_list[idx].req, req, sizeof(*req));
-        pending_list[idx].count = req->nr_segments;
-        
-        for (i = 0; i < req->nr_segments; i++) {
-            
-            sector  = req->sector_number + (8*i);
-            
-            size = blkif_last_sect (req->frame_and_sects[i]) -
-                   blkif_first_sect(req->frame_and_sects[i]) + 1;
-            
-            dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-            dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
-            
-            
-            DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
-                    "pos: %15lu dpage: %p\n", 
-                    req->sector_number, sector, 
-                    blkif_first_sect(req->frame_and_sects[i]),
-                    blkif_last_sect (req->frame_and_sects[i]),
-                    (long)(sector << SECTOR_SHIFT), dpage);
-            
-            /*convert size and sector to byte offsets */
-            size   <<= SECTOR_SHIFT;
-            sector <<= SECTOR_SHIFT;
-            
-            io = iocb_free[--iocb_free_count];
-            
-            io_prep_pread(io, fd, dpage, size, sector);
-            io->data = (void *)idx;
-            
-            ioq[i] = io;
-        }
-        
-        ret = io_submit(ctx, req->nr_segments, ioq);
-        if (ret < 0)
-            printf("BADNESS: io_submit error! (%d)\n", errno);
-        
-        
-        return BLKTAP_STOLEN;
-        
-    }
-    }
-    
-    printf("Unknown block operation!\n");
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = req->operation;
-    rsp->status = BLKIF_RSP_ERROR;
-    return BLKTAP_RESPOND;  
-}
-
-
-int aio_pollhook(int fd)
-{
-    struct io_event *ep;
-    int n, ret, idx;
-    blkif_request_t *req;
-    blkif_response_t *rsp;
-    
-    DPRINTF("aio_hook(): \n");
-    
-    for (ep = aio_events; aio_event_count-- > 0; ep++) {
-        struct iocb *io = ep->obj;
-        idx = (int) ep->data;
-        
-        if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
-            printf("gnbd returned a bad cookie (%u)!\n", idx);
-            break;
-        }
-        
-        if ((int)ep->res < 0) printf("aio request error! (%d,%d)\n", 
-            (int)ep->res, (int)ep->res2);
-        
-        pending_list[idx].count--;
-        iocb_free[iocb_free_count++] = io;
-        
-        if (pending_list[idx].count == 0) {
-            blkif_request_t tmp = pending_list[idx].req;
-            rsp = (blkif_response_t *)&pending_list[idx].req;
-            rsp->id = tmp.id;
-            rsp->operation = tmp.operation;
-            rsp->status = BLKIF_RSP_OKAY;
-            blktap_inject_response(rsp);
-        }
-    }
-    
-    printf("pollhook done!\n");
-    
-    read(aio_notify[READ], &idx, sizeof(idx));
-    aio_listening = 1;
-    
-    return 0;
-}
-
-/* the image library terminates the request stream. _resp is a noop. */
-int aio_response(blkif_response_t *rsp)
-{   
-    return BLKTAP_PASS;
-}
-
-void aio_init(void)
-{
-    int i, rc;
-    pthread_t p;
-    
-    for (i = 0; i < MAX_DOMS; i++)
-        images[i] = NULL;
-    
-    for (i = 0; i < MAX_REQUESTS; i++)
-        pending_list[i].count = 0; 
-    
-    memset(&ctx, 0, sizeof(ctx));
-    rc = io_queue_init(MAX_AIO_REQS, &ctx);
-    if (rc != 0) {
-        printf("queue_init failed! (%d)\n", rc);
-        exit(0);
-    }
-    
-    for (i=0; i<MAX_AIO_REQS; i++) {
-        if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) {
-            printf("error allocating iocb array\n");
-            exit(0);
-        }
-        iocb_free_count = i;
-    }
-    
-    rc = pipe(aio_notify);
-    if (rc != 0) {
-        printf("pipe failed! (%d)\n", errno);
-        exit(0);
-    }
-    
-    rc = pthread_create(&p, NULL, notifier_thread, NULL);
-    if (rc != 0) {
-        printf("pthread_create failed! (%d)\n", errno);
-        exit(0);
-    }
-    
-    aio_listening = 1;
-    
-    blktap_attach_poll(aio_notify[READ], POLLIN, aio_pollhook);
-}
-
diff --git a/tools/blktap/blkaiolib.h b/tools/blktap/blkaiolib.h
deleted file mode 100644 (file)
index 7e26dae..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-/* blkaiolib.h
- *
- * aio image-backed block device.
- * 
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- */
-
-int aio_control(control_msg_t *msg);
-int aio_request(blkif_request_t *req);
-int aio_response(blkif_response_t *rsp); /* noop */
-void aio_init(void);
diff --git a/tools/blktap/blkcow.c b/tools/blktap/blkcow.c
deleted file mode 100644 (file)
index 82f9335..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-/* blkcow.c
- *
- * copy on write a block device.  in a really inefficient way.
- * 
- * (c) 2004 Andrew Warfield.
- *
- * This uses whatever backend the tap is attached to as the read-only
- * underlay -- for the moment.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent,
- * the cow plugin uses this to identify a unique overlay.
- */
-
-#include "blktaplib.h"
-#include "blkcowlib.h"
-
-
-int main(int argc, char *argv[])
-{
-    cow_init();
-    
-    blktap_register_ctrl_hook("cow_control", cow_control);
-    blktap_register_request_hook("cow_request", cow_request);
-    blktap_register_response_hook("cow_response", cow_response);
-    blktap_listen();
-    
-    return 0;
-}
diff --git a/tools/blktap/blkcowgnbd.c b/tools/blktap/blkcowgnbd.c
deleted file mode 100644 (file)
index 81f9bad..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-/* blkcowgnbd.c
- *
- * gnbd-backed cow.
- */
-
-#include "blktaplib.h"
-#include "blkcowlib.h"
-#include "blkgnbdlib.h"
-
-
-int main(int argc, char *argv[])
-{
-    cow_init();
-    gnbd_init();
-    
-    blktap_register_ctrl_hook("cow_control", cow_control);
-    blktap_register_ctrl_hook("gnbd_control", gnbd_control);
-    blktap_register_request_hook("cow_request", cow_request);
-    blktap_register_request_hook("gnbd_request", gnbd_request);
-    blktap_register_response_hook("cow_response", cow_response);
-    blktap_listen();
-    
-    return 0;
-}
diff --git a/tools/blktap/blkcowimg.c b/tools/blktap/blkcowimg.c
deleted file mode 100644 (file)
index 40aa1f8..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-/* blkcowimg.c
- *
- * file-backed cow.
- */
-
-#include "blktaplib.h"
-#include "blkcowlib.h"
-#include "blkimglib.h"
-
-
-int main(int argc, char *argv[])
-{
-    cow_init();
-    image_init();
-    
-    blktap_register_ctrl_hook("cow_control", cow_control);
-    blktap_register_ctrl_hook("image_control", image_control);
-    blktap_register_request_hook("cow_request", cow_request);
-    blktap_register_request_hook("image_request", image_request);
-    blktap_register_response_hook("cow_response", cow_response);
-    blktap_listen();
-    
-    return 0;
-}
diff --git a/tools/blktap/blkcowlib.c b/tools/blktap/blkcowlib.c
deleted file mode 100644 (file)
index 3518b4f..0000000
+++ /dev/null
@@ -1,380 +0,0 @@
-/* blkcowlib.c
- *
- * copy on write a block device.  in a really inefficient way.
- * 
- * (c) 2004 Andrew Warfield.
- *
- * This uses whatever backend the tap is attached to as the read-only
- * underlay -- for the moment.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent,
- * the cow plugin uses this to identify a unique overlay.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <db.h>
-#include "blktaplib.h"
-
-#define MAX_DOMS        1024
-#define MAX_DBNAME_LEN   255
-#define AMORFS_DEV     61440
-#define MAX_REQUESTS      64 /* must be synced with the blkif drivers. */
-                                                                                
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-    
-/* Berkeley db has different params for open() after 4.1 */
-#ifndef DB_VERSION_MAJOR
-# define DB_VERSION_MAJOR 1
-#endif /* DB_VERSION_MAJOR */
-#ifndef DB_VERSION_MINOR
-# define DB_VERSION_MINOR 0
-#endif /* DB_VERSION_MINOR */
-
-typedef struct {
-    DB   *db;
-    u64  fsid;
-    char dbname[MAX_DBNAME_LEN];
-} cow_t;
-
-cow_t           *cows[MAX_DOMS];
-blkif_request_t *reread_list[MAX_REQUESTS];
-
-int cow_control(control_msg_t *msg)
-{
-    domid_t  domid;
-    DB      *db;
-    int      ret;
-    
-    if (msg->type != CMSG_BLKIF_BE) 
-    {
-        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
-        return 0;
-    }
-    
-    switch(msg->subtype)
-    {
-    case CMSG_BLKIF_BE_CREATE:
-        if ( msg->length != sizeof(blkif_be_create_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
-                ((blkif_be_create_t *)msg->msg)->domid,
-                ((blkif_be_create_t *)msg->msg)->blkif_handle);
-        domid = ((blkif_be_create_t *)msg->msg)->domid;
-        if (cows[domid] != NULL) {
-            printf("attempt to connect from an existing dom!\n");
-            return 0;
-        }
-        
-        cows[domid] = (cow_t *)malloc(sizeof(cow_t));
-        if (cows[domid] == NULL) {
-            printf("error allocating cow.\n");
-            return 0;
-        }
-        
-        cows[domid]->db   = NULL;
-        cows[domid]->fsid = 0;
-        
-        printf("COW connected.\n");
-        break;   
-        
-    case CMSG_BLKIF_BE_DESTROY:
-        if ( msg->length != sizeof(blkif_be_destroy_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
-                ((blkif_be_destroy_t *)msg->msg)->domid,
-                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
-        
-        domid = ((blkif_be_destroy_t *)msg->msg)->domid;
-        if (cows[domid] != NULL) {
-            if (cows[domid]->db != NULL)
-                cows[domid]->db->close(cows[domid]->db, 0);
-            free(cows[domid]);
-            cows[domid] = NULL;
-        }
-        break;  
-    case CMSG_BLKIF_BE_VBD_GROW:
-    {
-        blkif_be_vbd_grow_t *grow;
-        
-        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
-                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
-                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
-                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
-        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
-        grow = (blkif_be_vbd_grow_t *)msg->msg;
-        domid = grow->domid;
-        if (cows[domid] == NULL) {
-            printf("VBD_GROW on unconnected domain!\n");
-            return 0;
-        }
-        
-        if (grow->extent.device != AMORFS_DEV) {
-            printf("VBD_GROW on non-amorfs device!\n");
-            return 0;
-        }
-        
-        sprintf(&cows[domid]->dbname[0], "%020llu.db",
-                grow->extent.sector_start);
-        
-        cows[domid]->fsid = grow->extent.sector_start;
-            
-        if ((ret = db_create(&db, NULL, 0)) != 0) {
-            fprintf(stderr, "db_create: %s\n", db_strerror(ret));
-            return 0;
-        }
-        
-        
-#if DB_VERSION_MAJOR < 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 1)
-
-        if ((ret = db->open( db, cows[domid]->dbname, NULL, DB_BTREE, 
-                DB_CREATE, 0664)) != 0) {
-            
-#else /* DB_VERSION >= 4.1 */
-        
-        if ((ret = db->open( db, NULL, cows[domid]->dbname, NULL, DB_BTREE, 
-                DB_CREATE, 0664)) != 0) {
-            
-#endif /* DB_VERSION < 4.1 */
-
-            db->err(db, ret, "%s", cows[domid]->dbname);
-            goto create_failed;
-        }
-        cows[domid]->db = db;
-        printf("Overlay db opened. (%s)\n", cows[domid]->dbname);
-        break;
-    }    
-    }
-    return 0;
-parse_error:
-    printf("Bad control message!\n");
-    return 0;
-    
-create_failed:
-    /* TODO: close the db ref. */
-    return 0;
-}    
-int cow_request(blkif_request_t *req)
-{
-    DB *db;
-    DBT key, data;
-    u64 sector;
-    char *spage, *dpage;
-    int ret, i, idx;
-    blkif_response_t *rsp;
-    domid_t dom = ID_TO_DOM(req->id);
-    
-    if ((cows[dom] == NULL) || (cows[dom]->db == NULL)) {
-        printf("Data request for unknown domain!!! %d\n", dom);
-        rsp = (blkif_response_t *)req;
-        rsp->id = req->id;
-        rsp->operation = req->operation;
-        rsp->status = BLKIF_RSP_ERROR;
-        return BLKTAP_RESPOND;
-    }
-    
-    db = cows[dom]->db;
-    
-    switch (req->operation) 
-    {
-    case BLKIF_OP_PROBE:
-/* debug -- delete */
-idx = ID_TO_IDX(req->id);
-reread_list[idx] = (blkif_request_t *)malloc(sizeof(*req));
-memcpy(reread_list[idx], req, sizeof(*req));
-        return  BLKTAP_PASS;
-        
-    case BLKIF_OP_WRITE:
-        for (i = 0; i < req->nr_segments; i++) {
-            memset(&key, 0, sizeof(key));
-           memset(&data, 0, sizeof(data));
-            
-            sector = req->sector_number + (8*i);
-            key.data = &sector;
-            key.size = sizeof(sector);
-            
-            spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-            data.data = spage;
-            data.size = PAGE_SIZE;
-            
-            
-            DPRINTF("cWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
-                    req->sector_number, sector, 
-                    blkif_first_sect(req->frame_and_sects[i]),
-                    blkif_last_sect (req->frame_and_sects[i]),
-                    (long)(sector << 9));
-            
-            if ((ret = db->put(db, NULL, &key, &data, 0)) == 0)
-                DPRINTF("db: %lld: key stored.\n", *((u64 *)key.data));
-            else {
-                db->err(db, ret, "DB->put");
-                goto err;
-            }
-        }
-        
-        rsp = (blkif_response_t *)req;
-        rsp->id = req->id;
-        rsp->operation = BLKIF_OP_WRITE;
-        rsp->status = BLKIF_RSP_OKAY;
-        
-        return BLKTAP_RESPOND;
-
-    case BLKIF_OP_READ:
-        for (i = 0; i < req->nr_segments; i++) {
-            memset(&key, 0, sizeof(key));
-           memset(&data, 0, sizeof(data));
-            
-            sector = req->sector_number + (8*i);
-            key.data = &sector;
-            key.size = sizeof(sector);
-            
-            DPRINTF("cREAD: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
-                    req->sector_number, sector, 
-                    blkif_first_sect(req->frame_and_sects[i]),
-                    blkif_last_sect (req->frame_and_sects[i]),
-                    (long)(sector << 9));
-
-            if ((ret = db->get(db, NULL, &key, &data, 0)) == 0) {
-                DPRINTF("db: %llu: key retrieved (req).\n",
-                    *((u64 *)key.data));
-                
-                dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-                spage = data.data;
-                memcpy(dpage, spage, PAGE_SIZE);
-
-            } else if (ret == DB_NOTFOUND) {
-                idx = ID_TO_IDX(req->id);
-                if (idx > MAX_REQUESTS) {
-                    printf("Bad index!\n");
-                    goto err;
-                }
-                if (reread_list[idx] != NULL) {
-                    printf("Dupe index!\n");
-                    goto err;
-                }
-                reread_list[idx] = (blkif_request_t *)malloc(sizeof(*req));
-                memcpy(reread_list[idx], req, sizeof(*req));
-                return BLKTAP_PASS;
-            } else {
-                db->err(db, ret, "DB->get");
-                goto err;
-            }
-        }
-
-
-        rsp = (blkif_response_t *)req;
-        rsp->id = req->id;
-        rsp->operation = BLKIF_OP_READ;
-        rsp->status = BLKIF_RSP_OKAY;
-        return BLKTAP_RESPOND;
-    }
-    
-    printf("Unknow block operation!\n");
-    return BLKTAP_PASS;
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = req->operation;
-    rsp->status = BLKIF_RSP_ERROR;
-    return BLKTAP_RESPOND;  
-}
-
-int cow_response(blkif_response_t *rsp)
-{   
-    blkif_request_t *req;
-    int i, ret;
-    DB *db;
-    DBT key, data;
-    u64 sector;
-    char *spage, *dpage;
-    int idx = ID_TO_IDX(rsp->id);
-    domid_t dom;
-    
-    /* don't touch erroring responses. */
-    if (rsp->status == BLKIF_RSP_ERROR)
-        return BLKTAP_PASS;
-    
-    if ((rsp->operation == BLKIF_OP_READ) && (reread_list[idx] != NULL))
-    {
-        req = reread_list[idx];
-        dom = ID_TO_DOM(req->id);
-
-        if ((cows[dom] == NULL) || (cows[dom]->db == NULL)) {
-            printf("Response from unknown domain!!! Very badness! %d\n", dom);
-            return BLKTAP_PASS;
-        }
-    
-        db = cows[dom]->db;
-        
-        for (i = 0; i < req->nr_segments; i++) {
-            memset(&key, 0, sizeof(key));
-           memset(&data, 0, sizeof(data));
-            
-            sector = req->sector_number + (8*i);
-            key.data = &sector;
-            key.size = sizeof(sector);
-            
-            if ((ret = db->get(db, NULL, &key, &data, 0)) == 0) {
-                printf("db: %llu: key retrieved (rsp).\n",
-                    *((u64 *)key.data));
-                
-                dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-                spage = data.data;
-                memcpy(dpage, spage, PAGE_SIZE);
-
-            } else if (ret == DB_NOTFOUND) {
-                continue; /* We read this from disk. */
-            } else {
-                db->err(db, ret, "DB->get");
-                goto err;
-            }
-        }
-        free(reread_list[idx]);
-        reread_list[idx] = NULL;
-    }
-    
-    if (rsp->operation == BLKIF_OP_PROBE) {
-        
-        vdisk_t *img_info;
-        
-        req = reread_list[idx];
-        img_info = (vdisk_t *)(char *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
-        for (i =0; i < rsp->status; i++) 
-            printf("PROBE (%d) device: 0x%04x capacity: %llu, info: 0x%04x\n", 
-                    i,
-                    img_info[0].device,
-                    img_info[0].capacity,
-                    img_info[0].info);
-        free(reread_list[idx]);
-        reread_list[idx] = NULL;
-    }
-    
-err:
-    return BLKTAP_PASS;
-}
-
-void cow_init(void)
-{
-    int i;
-    
-    for (i = 0; i < MAX_DOMS; i++)
-        cows[i] = NULL;
-    
-    for (i = 0; i < MAX_REQUESTS; i++)
-        reread_list[MAX_REQUESTS] = NULL;
-}
-
diff --git a/tools/blktap/blkcowlib.h b/tools/blktap/blkcowlib.h
deleted file mode 100644 (file)
index e6bd7a5..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-/* blkcowlib.h
- *
- * copy on write a block device.  in a really inefficient way.
- * 
- * (c) 2004 Andrew Warfield.
- *
- * public interfaces to the CoW tap.
- *
- */
-int  cow_control  (control_msg_t *msg);
-int  cow_request  (blkif_request_t *req);
-int  cow_response (blkif_response_t *rsp);
-void cow_init     (void);
index d64a58981a99481cf6430682673f6342c6a6494d..0cf087ff0237c057ae14db4db55c5e49d897423b 100644 (file)
@@ -62,18 +62,6 @@ int control_print(control_msg_t *msg)
                 ((blkif_be_vbd_destroy_t *)msg->msg)->blkif_handle,
                 ((blkif_be_vbd_destroy_t *)msg->msg)->vdevice);
         break;
-    case CMSG_BLKIF_BE_VBD_GROW:
-        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
-                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
-                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
-                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
-        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
-        break;
     default:
         goto parse_error;
     }
diff --git a/tools/blktap/blkgnbd.c b/tools/blktap/blkgnbd.c
deleted file mode 100644 (file)
index 6a6bd67..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/* blkgnbd.c
- *
- * gnbd-backed disk.
- */
-
-#include "blktaplib.h"
-#include "blkgnbdlib.h"
-
-
-int main(int argc, char *argv[])
-{
-    gnbd_init();
-    
-    blktap_register_ctrl_hook("gnbd_control", gnbd_control);
-    blktap_register_request_hook("gnbd_request", gnbd_request);
-    blktap_listen();
-    
-    return 0;
-}
diff --git a/tools/blktap/blkgnbdlib.c b/tools/blktap/blkgnbdlib.c
deleted file mode 100644 (file)
index 6eeb49c..0000000
+++ /dev/null
@@ -1,471 +0,0 @@
-/* blkgnbdlib.c
- *
- * gnbd image-backed block device.
- * 
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <db.h>       
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/poll.h>
-#include "blktaplib.h"
-#include "libgnbd/libgnbd.h"
-
-#define GNBD_SERVER  "skirmish.cl.cam.ac.uk"
-#define GNBD_CLIENT  "pengi-0.xeno.cl.cam.ac.uk"
-#define GNBD_MOUNT   "fc2_akw27"
-#define GNBD_PORT    0x38e7
-
-#define MAX_DOMS        1024
-#define MAX_IMGNAME_LEN  255
-#define AMORFS_DEV     61440
-#define MAX_REQUESTS      64 /* must be synced with the blkif drivers. */
-#define SECTOR_SHIFT       9
-                                                                                
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-        
-#if 1                                                                        
-#define ASSERT(_p) \
-    if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
-    __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p) ((void)0)
-#endif
-
-#define GH_DISCONNECTED 0
-#define GH_PROBEWAITING 1
-#define GH_CONNECTED    2
-
-typedef struct {
-    /* These need to turn into an array/rbtree for multi-disk support. */
-    struct gnbd_handle *gh;
-    int          gh_state;
-    int          probe_idx; /* This really needs cleaning up after hotos. */
-    int          fd;
-    u64          fsid;
-    char         gnbdname[MAX_IMGNAME_LEN];
-    blkif_vdev_t vdevice;
-} gnbd_t;
-
-/* Note on pending_reqs: I assume all reqs are queued before they start to 
- * get filled.  so count of 0 is an unused record.
- */
-typedef struct {
-    blkif_request_t  req;
-    int              count;
-} pending_req_t;
-
-static gnbd_t          *gnbds[MAX_DOMS];
-static pending_req_t    pending_list[MAX_REQUESTS];
-static int              pending_count = 0; /* debugging */
-
-
-gnbd_t *get_gnbd_by_fd(int fd)
-{
-    /* this is a linear scan for the moment.  nees to be cleaned up for
-       multi-disk support. */
-    
-    int i;
-    
-    for (i=0; i< MAX_DOMS; i++) 
-        if ((gnbds[i] != NULL) && (gnbds[i]->fd == fd))
-            return gnbds[i];
-    
-    return NULL;
-}
-
-int gnbd_pollhook(int fd);
-
-int gnbd_control(control_msg_t *msg)
-{
-    domid_t  domid;
-    DB      *db;
-    int      ret;
-    
-    if (msg->type != CMSG_BLKIF_BE) 
-    {
-        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
-        return 0;
-    }
-    
-    switch(msg->subtype)
-    {
-    case CMSG_BLKIF_BE_CREATE:
-        if ( msg->length != sizeof(blkif_be_create_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
-                ((blkif_be_create_t *)msg->msg)->domid,
-                ((blkif_be_create_t *)msg->msg)->blkif_handle);
-        domid = ((blkif_be_create_t *)msg->msg)->domid;
-        if (gnbds[domid] != NULL) {
-            printf("attempt to connect from an existing dom!\n");
-            return 0;
-        }
-        
-        gnbds[domid] = (gnbd_t *)malloc(sizeof(gnbd_t));
-        if (gnbds[domid] == NULL) {
-            printf("error allocating gnbd record.\n");
-            return 0;
-        }
-        
-        gnbds[domid]->gh  = NULL;
-        gnbds[domid]->fsid = 0;
-        
-        break;   
-        
-    case CMSG_BLKIF_BE_DESTROY:
-        if ( msg->length != sizeof(blkif_be_destroy_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
-                ((blkif_be_destroy_t *)msg->msg)->domid,
-                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
-        
-        domid = ((blkif_be_destroy_t *)msg->msg)->domid;
-        if (gnbds[domid] != NULL) {
-            if (gnbds[domid]->gh != NULL) {
-                blktap_detach_poll(gnbds[domid]->fd);
-                free(gnbds[domid]->gh); /* XXX: Need a gnbd close call! */;
-            }
-            free( gnbds[domid] );
-            gnbds[domid] = NULL;
-        }
-        break;  
-    case CMSG_BLKIF_BE_VBD_GROW:
-    {
-        blkif_be_vbd_grow_t *grow;
-        
-        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
-                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
-                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
-                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
-        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
-        grow = (blkif_be_vbd_grow_t *)msg->msg;
-        domid = grow->domid;
-        if (gnbds[domid] == NULL) {
-            printf("VBD_GROW on unconnected domain!\n");
-            return 0;
-        }
-        
-        if (grow->extent.device != AMORFS_DEV) {
-            printf("VBD_GROW on non-amorfs device!\n");
-            return 0;
-        }
-        
-        /* TODO: config support for arbitrary gnbd files/modes. */
-        sprintf(gnbds[domid]->gnbdname, GNBD_MOUNT);
-        
-        gnbds[domid]->fsid   = grow->extent.sector_start;
-        gnbds[domid]->vdevice = grow->vdevice; 
-        gnbds[domid]->gh_state = GH_DISCONNECTED;
-        gnbds[domid]->gh = gnbd_setup(GNBD_SERVER, GNBD_PORT, 
-            gnbds[domid]->gnbdname, GNBD_CLIENT);
-        if (gnbds[domid]->gh == NULL) { 
-            printf("Couldn't connect to gnbd mount!!\n");
-            return 0;
-        }
-        gnbds[domid]->fd = gnbd_fd(gnbds[domid]->gh);
-        blktap_attach_poll(gnbds[domid]->fd, POLLIN, gnbd_pollhook);
-        
-        printf("gnbd mount connected. (%s)\n", gnbds[domid]->gnbdname);
-        break;
-    }    
-    }
-    return 0;
-parse_error:
-    printf("Bad control message!\n");
-    return 0;
-    
-create_failed:
-    /* TODO: close the db ref. */
-    return 0;
-}    
-static int gnbd_blkif_probe(blkif_request_t *req, gnbd_t *gnbd)
-{
-    int fd;
-    struct stat stat;
-    vdisk_t *gnbd_info;
-    blkif_response_t *rsp;
-
-    /* We expect one buffer only. */
-    if ( req->nr_segments != 1 )
-        goto err;
-
-    /* Make sure the buffer is page-sized. */
-    if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
-         (blkif_last_sect (req->frame_and_sects[0]) != 7) )
-        goto err;
-
-    /* loop for multiple gnbds would start here. */
-
-    gnbd_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
-    gnbd_info[0].device   = gnbd->vdevice;
-    gnbd_info[0].info     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
-    gnbd_info[0].capacity = gnbd_sectors(gnbd->gh);
-
-    printf("[SECTORS] %llu", gnbd_info[0].capacity);
-
-    //if (gnbd_info[0].capacity == 0)
-    //    gnbd_info[0].capacity = ((u64)1 << 63); // xend does this too.
-
-    DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", gnbd_info[0].device,
-            gnbd_info[0].capacity);
-
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = BLKIF_OP_PROBE;
-    rsp->status = 1; /* number of disks */
-
-    return  BLKTAP_RESPOND;
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = req->operation;
-    rsp->status = BLKIF_RSP_ERROR;
-    return BLKTAP_RESPOND;  
-}
-
-int gnbd_request(blkif_request_t *req)
-{
-    struct gnbd_handle *gh;
-    u64 sector;
-    char *spage, *dpage;
-    int ret, i, idx;
-    blkif_response_t *rsp;
-    domid_t dom = ID_TO_DOM(req->id);
-    
-    if ((gnbds[dom] == NULL) || (gnbds[dom]->gh == NULL)) {
-        printf("Data request for unknown domain!!! %d\n", dom);
-        rsp = (blkif_response_t *)req;
-        rsp->id = req->id;
-        rsp->operation = req->operation;
-        rsp->status = BLKIF_RSP_ERROR;
-        return BLKTAP_RESPOND;
-    }
-    
-    gh = gnbds[dom]->gh;
-    
-    switch (req->operation) 
-    {
-    case BLKIF_OP_PROBE:
-    {
-        printf("PROBE!\n");
-        if ( gnbds[dom]->gh_state == GH_PROBEWAITING ) {
-            printf("Already have a PROBE outstanding!\n");
-            goto err;
-        }
-        
-        if ( gnbds[dom]->gh_state == GH_DISCONNECTED )
-        {
-            /* need to defer until we are connected. */
-            printf("Deferring PROBE!\n");
-            idx = ID_TO_IDX(req->id);
-            memcpy(&pending_list[idx].req, req, sizeof(*req));
-            ASSERT(pending_list[idx].count == 0);
-            pending_list[idx].count = 1;
-            
-            gnbds[dom]->probe_idx = idx;
-            gnbds[dom]->gh_state  = GH_PROBEWAITING;
-
-            return BLKTAP_STOLEN;
-        }
-            
-        
-        return gnbd_blkif_probe(req, gnbds[dom]);
-    }    
-    case BLKIF_OP_WRITE:
-    {
-        unsigned long size;
-        
-        idx = ID_TO_IDX(req->id);
-        ASSERT(pending_list[idx].count == 0);
-        memcpy(&pending_list[idx].req, req, sizeof(*req));
-        pending_list[idx].count = req->nr_segments;
-        pending_count++; /* dbg */
-        
-        for (i = 0; i < req->nr_segments; i++) {
-            
-            sector = req->sector_number + (8*i);
-            
-            size = blkif_last_sect (req->frame_and_sects[i]) -
-                   blkif_first_sect(req->frame_and_sects[i]) + 1;
-            
-            DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
-                    req->sector_number, sector, 
-                    blkif_first_sect(req->frame_and_sects[i]),
-                    blkif_last_sect (req->frame_and_sects[i]),
-                    (long)(sector << SECTOR_SHIFT));
-                        
-            spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-            spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
-            
-            ret = gnbd_write(gh, sector, size, spage, (unsigned long)idx);
-            if (ret) {
-                printf("gnbd error on WRITE\n");
-                goto err;
-            }
-        }
-//printf("[WR] < %lu\n", (unsigned long)idx);
-        
-        return BLKTAP_STOLEN;
-    }
-    case BLKIF_OP_READ:
-    {
-        unsigned long size;
-        
-        idx = ID_TO_IDX(req->id);
-        ASSERT(pending_list[idx].count == 0);
-        memcpy(&pending_list[idx].req, req, sizeof(*req));
-        pending_list[idx].count = req->nr_segments;
-        pending_count++; /* dbg */
-            
-        for (i = 0; i < req->nr_segments; i++) {
-            
-            sector  = req->sector_number + (8*i);
-            
-            size = blkif_last_sect (req->frame_and_sects[i]) -
-                   blkif_first_sect(req->frame_and_sects[i]) + 1;
-            
-            DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
-                    req->sector_number, sector, 
-                    blkif_first_sect(req->frame_and_sects[i]),
-                    blkif_last_sect (req->frame_and_sects[i]),
-                    (long)(sector << SECTOR_SHIFT));
-            
-            dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-            dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
-            
-            ret = gnbd_read(gh, sector, size, dpage, (unsigned long)idx);
-            if (ret) {
-                printf("gnbd error on READ\n");
-                goto err;
-            }
-            
-        }
-//printf("[RD] < %lu\n", (unsigned long)idx);
-        
-        return BLKTAP_STOLEN;
-    }
-    }
-    
-    printf("Unknown block operation!\n");
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = req->operation;
-    rsp->status = BLKIF_RSP_ERROR;
-    return BLKTAP_RESPOND;  
-}
-
-/* the gnbd library terminates the request stream. _resp is a noop. */
-int gnbd_response(blkif_response_t *rsp)
-{   
-    return BLKTAP_PASS;
-}
-
-int gnbd_pollhook(int fd)
-{
-    int err;
-    struct gnbd_handle *gh;
-    blkif_request_t *req;
-    blkif_response_t *rsp;
-    unsigned long idx;
-    
-    gnbd_t *gnbd = get_gnbd_by_fd(fd);
-    
-    if (gnbd == NULL) {
-        printf("GNBD badness: got poll hook on unknown device. (%d)\n", fd);
-        return -1;
-    }
-    gh = gnbd->gh;
-    err = gnbd_reply(gh);
-    switch (err) {
-    case GNBD_LOGIN_DONE:
-        if (gnbd->gh_state == GH_PROBEWAITING) {
-            req = (blkif_request_t *)&pending_list[gnbd->probe_idx].req;
-            printf("[!] Sending deferred PROBE!\n");
-            gnbd_blkif_probe(req, gnbd);
-            pending_list[gnbd->probe_idx].count = 0;
-            rsp = (blkif_response_t *)req;
-            blktap_inject_response(rsp);
-        }
-        gnbd->gh_state = GH_CONNECTED;
-        printf("GNBD_LOGIN_DONE (%d)\n", fd); 
-        break;
-
-    case GNBD_REQUEST_DONE: /* switch to idx */
-        idx = gnbd_finished_request(gh);
-        req = (blkif_request_t *)&pending_list[idx].req;
-        if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
-            printf("gnbd returned a bad cookie (%lu)!\n", idx);
-            break;
-        }
-        
-        pending_list[idx].count--;
-        
-        if (pending_list[idx].count == 0) {
-            blkif_request_t tmp = *req;
-            pending_count--; /* dbg */
-            rsp = (blkif_response_t *)req;
-            rsp->id = tmp.id;
-            rsp->operation = tmp.operation;
-            rsp->status = BLKIF_RSP_OKAY;
-            blktap_inject_response(rsp);
-/*
-if (rsp->operation == BLKIF_OP_READ) {
-printf("[RD] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
-} else if (rsp->operation == BLKIF_OP_WRITE) {
-printf("[WR] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
-} else  {
-printf("[??] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
-}
-*/
-        }
-        break;
-        
-    case GNBD_CONTINUE:
-        break;
-        
-    case 0:
-        break;
-        
-    default:
-        printf("gnbd_reply error");
-        break;
-    }
-    return 0;
-}
-
-void gnbd_init(void)
-{   
-    int i;
-    
-    for (i = 0; i < MAX_DOMS; i++)
-        gnbds[i] = NULL;
-    
-    for (i = 0; i < MAX_REQUESTS; i++)
-        pending_list[i].count = 0; 
-    
-    printf("GNBD image plugin initialized\n");
-}
-
diff --git a/tools/blktap/blkgnbdlib.h b/tools/blktap/blkgnbdlib.h
deleted file mode 100644 (file)
index b95d240..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-/* blkgnbdlib.h
- *
- * gndb image-backed block device.
- * 
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- */
-
-int gnbd_control(control_msg_t *msg);
-int gnbd_request(blkif_request_t *req);
-int gnbd_response(blkif_response_t *rsp); /* noop */
-void gnbd_init(void);
diff --git a/tools/blktap/blkimg.c b/tools/blktap/blkimg.c
deleted file mode 100644 (file)
index fc746ad..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/* blkimg.c
- *
- * file-backed disk.
- */
-
-#include "blktaplib.h"
-#include "blkimglib.h"
-
-
-int main(int argc, char *argv[])
-{
-    image_init();
-    
-    blktap_register_ctrl_hook("image_control", image_control);
-    blktap_register_request_hook("image_request", image_request);
-    blktap_listen();
-    
-    return 0;
-}
diff --git a/tools/blktap/blkimglib.c b/tools/blktap/blkimglib.c
deleted file mode 100644 (file)
index 075a2d9..0000000
+++ /dev/null
@@ -1,325 +0,0 @@
-/* blkimglib.c
- *
- * file image-backed block device.
- * 
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <db.h>       
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <errno.h>
-#include "blktaplib.h"
-
-//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
-#define TMP_IMAGE_FILE_NAME "fc3.image"
-
-#define MAX_DOMS        1024
-#define MAX_IMGNAME_LEN  255
-#define AMORFS_DEV     61440
-#define MAX_REQUESTS      64 /* must be synced with the blkif drivers. */
-#define SECTOR_SHIFT       9
-                                                                                
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-                                                                                
-
-typedef struct {
-    /* These need to turn into an array/rbtree for multi-disk support. */
-    FILE *img;
-    u64  fsid;
-    char imgname[MAX_IMGNAME_LEN];
-    blkif_vdev_t   vdevice;
-} image_t;
-
-image_t         *images[MAX_DOMS];
-blkif_request_t *reread_list[MAX_REQUESTS];
-
-int image_control(control_msg_t *msg)
-{
-    domid_t  domid;
-    DB      *db;
-    int      ret;
-    
-    if (msg->type != CMSG_BLKIF_BE) 
-    {
-        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
-        return 0;
-    }
-    
-    switch(msg->subtype)
-    {
-    case CMSG_BLKIF_BE_CREATE:
-        if ( msg->length != sizeof(blkif_be_create_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
-                ((blkif_be_create_t *)msg->msg)->domid,
-                ((blkif_be_create_t *)msg->msg)->blkif_handle);
-        domid = ((blkif_be_create_t *)msg->msg)->domid;
-        if (images[domid] != NULL) {
-            printf("attempt to connect from an existing dom!\n");
-            return 0;
-        }
-        
-        images[domid] = (image_t *)malloc(sizeof(image_t));
-        if (images[domid] == NULL) {
-            printf("error allocating image record.\n");
-            return 0;
-        }
-        
-        images[domid]->img  = NULL;
-        images[domid]->fsid = 0;
-        
-        printf("Image connected.\n");
-        break;   
-        
-    case CMSG_BLKIF_BE_DESTROY:
-        if ( msg->length != sizeof(blkif_be_destroy_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
-                ((blkif_be_destroy_t *)msg->msg)->domid,
-                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
-        
-        domid = ((blkif_be_destroy_t *)msg->msg)->domid;
-        if (images[domid] != NULL) {
-            if (images[domid]->img != NULL)
-                fclose( images[domid]->img );
-            free( images[domid] );
-            images[domid] = NULL;
-        }
-        break;  
-    case CMSG_BLKIF_BE_VBD_GROW:
-    {
-        blkif_be_vbd_grow_t *grow;
-        
-        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
-                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
-                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
-                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
-        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
-                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
-        grow = (blkif_be_vbd_grow_t *)msg->msg;
-        domid = grow->domid;
-        if (images[domid] == NULL) {
-            printf("VBD_GROW on unconnected domain!\n");
-            return 0;
-        }
-        
-        if (grow->extent.device != AMORFS_DEV) {
-            printf("VBD_GROW on non-amorfs device!\n");
-            return 0;
-        }
-        
-        /* TODO: config support for arbitrary image files/modes. */
-        sprintf(images[domid]->imgname, TMP_IMAGE_FILE_NAME);
-        
-        images[domid]->fsid   = grow->extent.sector_start;
-        images[domid]->vdevice = grow->vdevice; 
-        images[domid]->img = fopen64(TMP_IMAGE_FILE_NAME, "r+");
-        if (images[domid]->img == NULL) { 
-            printf("Couldn't open image file!\n");
-            return 0;
-        }
-        
-        printf("Image file opened. (%s)\n", images[domid]->imgname);
-        break;
-    }    
-    }
-    return 0;
-parse_error:
-    printf("Bad control message!\n");
-    return 0;
-    
-create_failed:
-    /* TODO: close the db ref. */
-    return 0;
-}    
-int image_request(blkif_request_t *req)
-{
-    FILE *img;
-    u64 sector;
-    char *spage, *dpage;
-    int ret, i, idx;
-    blkif_response_t *rsp;
-    domid_t dom = ID_TO_DOM(req->id);
-    
-    if ((images[dom] == NULL) || (images[dom]->img == NULL)) {
-        printf("Data request for unknown domain!!! %d\n", dom);
-        rsp = (blkif_response_t *)req;
-        rsp->id = req->id;
-        rsp->operation = req->operation;
-        rsp->status = BLKIF_RSP_ERROR;
-        return BLKTAP_RESPOND;
-    }
-    
-    img = images[dom]->img;
-    
-    switch (req->operation) 
-    {
-    case BLKIF_OP_PROBE:
-    {
-        int fd;
-        struct stat stat;
-        vdisk_t *img_info;
-        
-        
-        /* We expect one buffer only. */
-        if ( req->nr_segments != 1 )
-            goto err;
-                                                                                
-        /* Make sure the buffer is page-sized. */
-        if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
-             (blkif_last_sect (req->frame_and_sects[0]) != 7) )
-            goto err;
-
-        /* loop for multiple images would start here. */
-        
-        fd = fileno(img);
-        if (fd == -1) {
-            printf("Couldn't get image fd in PROBE!\n");
-            goto err;
-        }
-        
-        ret = fstat(fd, &stat);
-        if (ret != 0) {
-            printf("Couldn't stat image in PROBE!\n");
-            goto err;
-        }
-        
-        img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
-        img_info[0].device   = images[dom]->vdevice;
-        img_info[0].info     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
-        img_info[0].capacity = (stat.st_size >> SECTOR_SHIFT);
-        
-        if (img_info[0].capacity == 0)
-            img_info[0].capacity = ((u64)1 << 63); // xend does this too.
-        
-        DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", img_info[0].device,
-                img_info[0].capacity);
-        
-        rsp = (blkif_response_t *)req;
-        rsp->id = req->id;
-        rsp->operation = BLKIF_OP_PROBE;
-        rsp->status = 1; /* number of disks */
-        
-        return  BLKTAP_RESPOND;
-    }    
-    case BLKIF_OP_WRITE:
-    {
-        unsigned long size;
-        
-        for (i = 0; i < req->nr_segments; i++) {
-            
-            sector = req->sector_number + (8*i);
-            
-            size = blkif_last_sect (req->frame_and_sects[i]) -
-                   blkif_first_sect(req->frame_and_sects[i]) + 1;
-            
-            ret = fseeko64(img, (off_t)(sector << SECTOR_SHIFT), SEEK_SET);
-            if (ret != 0) {
-                printf("fseek error on WRITE\n");
-                goto err;
-            }
-            
-            DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
-                    req->sector_number, sector, 
-                    blkif_first_sect(req->frame_and_sects[i]),
-                    blkif_last_sect (req->frame_and_sects[i]),
-                    (long)(sector << SECTOR_SHIFT));
-                        
-            spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-            spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
-            ret = fwrite(spage, size << SECTOR_SHIFT, 1, img);
-            if (ret != 1) {
-                printf("fwrite error on WRITE (%d)\n", errno);
-                goto err;
-            }
-        }
-        
-        rsp = (blkif_response_t *)req;
-        rsp->id = req->id;
-        rsp->operation = BLKIF_OP_WRITE;
-        rsp->status = BLKIF_RSP_OKAY;
-        
-        return BLKTAP_RESPOND;
-    }
-    case BLKIF_OP_READ:
-    {
-        unsigned long size;
-        
-        for (i = 0; i < req->nr_segments; i++) {
-            
-            sector  = req->sector_number + (8*i);
-            
-            size = blkif_last_sect (req->frame_and_sects[i]) -
-                   blkif_first_sect(req->frame_and_sects[i]) + 1;
-            
-            ret = fseeko64(img, (off_t)(sector << SECTOR_SHIFT), SEEK_SET);
-            if (ret != 0) {
-                printf("fseek error on READ\n");
-                goto err;
-            }
-        
-            DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
-                    req->sector_number, sector, 
-                    blkif_first_sect(req->frame_and_sects[i]),
-                    blkif_last_sect (req->frame_and_sects[i]),
-                    (long)(sector << SECTOR_SHIFT));
-            
-            dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-            dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
-            ret = fread(dpage, size << SECTOR_SHIFT, 1, img);
-            if (ret != 1) {
-                printf("fread error on READ\n");
-                goto err;
-            }
-        }
-
-        rsp = (blkif_response_t *)req;
-        rsp->id = req->id;
-        rsp->operation = BLKIF_OP_READ;
-        rsp->status = BLKIF_RSP_OKAY;
-        return BLKTAP_RESPOND;
-    }
-    }
-    
-    printf("Unknow block operation!\n");
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = req->operation;
-    rsp->status = BLKIF_RSP_ERROR;
-    return BLKTAP_RESPOND;  
-}
-
-/* the image library terminates the request stream. _resp is a noop. */
-int image_response(blkif_response_t *rsp)
-{   
-    return BLKTAP_PASS;
-}
-
-void image_init(void)
-{
-    int i;
-    
-    for (i = 0; i < MAX_DOMS; i++)
-        images[i] = NULL;
-}
-
diff --git a/tools/blktap/blkimglib.h b/tools/blktap/blkimglib.h
deleted file mode 100644 (file)
index 1bc597f..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-/* blkimglib.h
- *
- * file image-backed block device.
- * 
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- */
-
-int image_control(control_msg_t *msg);
-int image_request(blkif_request_t *req);
-int image_response(blkif_response_t *rsp); /* noop */
-void image_init(void);
diff --git a/tools/blktap/block-async.c b/tools/blktap/block-async.c
new file mode 100755 (executable)
index 0000000..6f26071
--- /dev/null
@@ -0,0 +1,404 @@
+/* block-async.c\r
+ * \r
+ * Asynchronous block wrappers for parallax.\r
+ */\r
\r
\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include <string.h>\r
+#include <pthread.h>\r
+#include "block-async.h"\r
+#include "blockstore.h"\r
+#include "vdi.h"\r
+\r
+\r
+#if 0\r
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )\r
+#else\r
+#define DPRINTF(_f, _a...) ((void)0)\r
+#endif\r
+\r
+/* We have a queue of outstanding I/O requests implemented as a \r
+ * circular producer-consumer ring with free-running buffers.\r
+ * to allow reordering, this ring indirects to indexes in an \r
+ * ring of io_structs.\r
+ * \r
+ * the block_* calls may either add an entry to this ring and return, \r
+ * or satisfy the request immediately and call the callback directly.\r
+ * None of the io calls in parallax should be nested enough to worry \r
+ * about stack problems with this approach.\r
+ */\r
+\r
+struct read_args {\r
+       u64 addr;\r
+};\r
+\r
+struct write_args {\r
+       u64   addr;\r
+       char *block;\r
+};\r
+\r
+struct alloc_args {\r
+       char *block;\r
+};\r
\r
+struct pending_io_req {\r
+       enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op;\r
+       union {\r
+               struct read_args  r;\r
+               struct write_args w;\r
+               struct alloc_args a;\r
+       } u;\r
+       io_cb_t cb;\r
+       void *param;\r
+};\r
+\r
+void radix_lock_init(struct radix_lock *r)\r
+{\r
+       int i;\r
+       \r
+       pthread_mutex_init(&r->lock, NULL);\r
+       for (i=0; i < 1024; i++) {\r
+               r->lines[i] = 0;\r
+               r->waiters[i] = NULL;\r
+               r->state[i] = ANY;\r
+       }\r
+}\r
+\r
+/* maximum outstanding I/O requests issued asynchronously */\r
+/* must be a power of 2.*/\r
+#define MAX_PENDING_IO 1024 //1024\r
+\r
+/* how many threads to concurrently issue I/O to the disk. */\r
+#define IO_POOL_SIZE   10 //10\r
+\r
+static struct pending_io_req pending_io_reqs[MAX_PENDING_IO];\r
+static int pending_io_list[MAX_PENDING_IO];\r
+static unsigned long io_prod = 0, io_cons = 0, io_free = 0;\r
+#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1))\r
+#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs)\r
+#define PENDING_IO_ENT(_x) \\r
+       (&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]])\r
+#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod)\r
+#define CAN_CONSUME_PENDING_IO (io_cons != io_prod)\r
+static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER;\r
+static pthread_cond_t  pending_io_cond = PTHREAD_COND_INITIALIZER;\r
+\r
+static void init_pending_io(void)\r
+{\r
+       int i;\r
+       \r
+       for (i=0; i<MAX_PENDING_IO; i++)\r
+               pending_io_list[i] = i;\r
+               \r
+} \r
+\r
+void block_read(u64 addr, io_cb_t cb, void *param)\r
+{\r
+       struct pending_io_req *req;\r
+       \r
+       pthread_mutex_lock(&pending_io_lock);\r
+       assert(CAN_PRODUCE_PENDING_IO);\r
+\r
+       req = PENDING_IO_ENT(io_prod++);\r
+       DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req);\r
+       req->op = IO_READ;\r
+       req->u.r.addr = addr;\r
+       req->cb = cb;\r
+       req->param = param;\r
+       \r
+    pthread_cond_signal(&pending_io_cond);\r
+       pthread_mutex_unlock(&pending_io_lock); \r
+}\r
+\r
+\r
+void block_write(u64 addr, char *block, io_cb_t cb, void *param)\r
+{\r
+       struct pending_io_req *req;\r
+       \r
+       pthread_mutex_lock(&pending_io_lock);\r
+       assert(CAN_PRODUCE_PENDING_IO);\r
+\r
+       req = PENDING_IO_ENT(io_prod++);\r
+       DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req);\r
+       req->op = IO_WRITE;\r
+       req->u.w.addr  = addr;\r
+       req->u.w.block = block;\r
+       req->cb = cb;\r
+       req->param = param;\r
+       \r
+    pthread_cond_signal(&pending_io_cond);\r
+       pthread_mutex_unlock(&pending_io_lock); \r
+}\r
+\r
+\r
+void block_alloc(char *block, io_cb_t cb, void *param)\r
+{\r
+       struct pending_io_req *req;\r
+       \r
+       pthread_mutex_lock(&pending_io_lock);\r
+       assert(CAN_PRODUCE_PENDING_IO);\r
+\r
+       req = PENDING_IO_ENT(io_prod++);\r
+       req->op = IO_ALLOC;\r
+       req->u.a.block = block;\r
+       req->cb = cb;\r
+       req->param = param;\r
+       \r
+    pthread_cond_signal(&pending_io_cond);\r
+       pthread_mutex_unlock(&pending_io_lock); \r
+}\r
+\r
+void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param)\r
+{\r
+       struct io_ret ret;\r
+       pthread_mutex_lock(&r->lock);\r
+       \r
+       if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) {\r
+               r->lines[row]++;\r
+               r->state[row] = READ;\r
+               DPRINTF("RLOCK  : %3d (row: %d)\n", r->lines[row], row);\r
+               pthread_mutex_unlock(&r->lock);\r
+               ret.type = IO_INT_T;\r
+               ret.u.i = 0;\r
+               cb(ret, param);\r
+       } else {\r
+               struct radix_wait **rwc;\r
+               struct radix_wait *rw = \r
+                       (struct radix_wait *) malloc (sizeof(struct radix_wait));\r
+               DPRINTF("RLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);\r
+               rw->type  = RLOCK;\r
+               rw->param = param;\r
+               rw->cb    = cb;\r
+               rw->next  = NULL;\r
+               /* append to waiters list. */\r
+               rwc = &r->waiters[row];\r
+               while (*rwc != NULL) rwc = &(*rwc)->next;\r
+               *rwc = rw;\r
+               pthread_mutex_unlock(&r->lock);\r
+               return;\r
+       }\r
+}\r
+\r
+\r
+void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param)\r
+{\r
+       struct io_ret ret;\r
+       pthread_mutex_lock(&r->lock);\r
+       \r
+       /* the second check here is redundant -- just here for debugging now. */\r
+       if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) {\r
+               r->state[row] = STOP;\r
+               r->lines[row] = -1;\r
+               DPRINTF("WLOCK  : %3d (row: %d)\n", r->lines[row], row);\r
+               pthread_mutex_unlock(&r->lock);\r
+               ret.type = IO_INT_T;\r
+               ret.u.i = 0;\r
+               cb(ret, param);\r
+       } else {\r
+               struct radix_wait **rwc;\r
+               struct radix_wait *rw = \r
+                       (struct radix_wait *) malloc (sizeof(struct radix_wait));\r
+               DPRINTF("WLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);\r
+               rw->type  = WLOCK;\r
+               rw->param = param;\r
+               rw->cb    = cb;\r
+               rw->next  = NULL;\r
+               /* append to waiters list. */\r
+               rwc = &r->waiters[row];\r
+               while (*rwc != NULL) rwc = &(*rwc)->next;\r
+               *rwc = rw;\r
+               pthread_mutex_unlock(&r->lock);\r
+               return;\r
+       }\r
+       \r
+}\r
+\r
+/* called with radix_lock locked and lock count of zero. */\r
+static void wake_waiters(struct radix_lock *r, int row)\r
+{\r
+       struct pending_io_req *req;\r
+       struct radix_wait *rw;\r
+       \r
+       DPRINTF("prewake\n");\r
+       if (r->lines[row] != 0) return;\r
+       if (r->waiters[row] == NULL) {DPRINTF("nowaiters!\n");return;} \r
+       \r
+       DPRINTF("wake\n");\r
+       if (r->waiters[row]->type == WLOCK) {\r
+               rw = r->waiters[row];\r
+               pthread_mutex_lock(&pending_io_lock);\r
+               assert(CAN_PRODUCE_PENDING_IO);\r
+\r
+               req = PENDING_IO_ENT(io_prod++);\r
+               DPRINTF("Produce (WWAKE) %lu (%p)\n", io_prod - 1, req);\r
+               req->op    = IO_WWAKE;\r
+               req->cb    = rw->cb;\r
+               req->param = rw->param;\r
+               r->lines[row] = -1; /* write lock the row. */\r
+               r->state[row] = STOP;\r
+               r->waiters[row] = rw->next;\r
+               free(rw);\r
+               pthread_mutex_unlock(&pending_io_lock);\r
+       } else /* RLOCK */ {\r
+               while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) {\r
+                       rw = r->waiters[row];\r
+                       pthread_mutex_lock(&pending_io_lock);\r
+                       assert(CAN_PRODUCE_PENDING_IO);\r
+       \r
+                       req = PENDING_IO_ENT(io_prod++);\r
+                       DPRINTF("Produce (RWAKE) %lu (%p)\n", io_prod - 1, req);\r
+                       req->op    = IO_RWAKE;\r
+                       req->cb    = rw->cb;\r
+                       req->param = rw->param;\r
+                       r->lines[row]++; /* read lock the row. */\r
+                       r->state[row] = READ; \r
+                       r->waiters[row] = rw->next;\r
+                       free(rw);\r
+                       pthread_mutex_unlock(&pending_io_lock);\r
+               }\r
+               if (r->waiters[row] != NULL) /* There is a write queued still */\r
+                       r->state[row] = STOP;\r
+       }       \r
+       \r
+       DPRINTF("wakedone\n");\r
+       DPRINTF("prod: %lu cons: %lu free: %lu\n", io_prod, io_cons, io_free);\r
+       pthread_mutex_lock(&pending_io_lock);\r
+    pthread_cond_signal(&pending_io_cond);\r
+       pthread_mutex_unlock(&pending_io_lock);\r
+}\r
+\r
+void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param)\r
+{\r
+       struct io_ret ret;\r
+       \r
+       pthread_mutex_lock(&r->lock);\r
+       assert(r->lines[row] > 0); /* try to catch misuse. */\r
+       r->lines[row]--;\r
+       DPRINTF("RUNLOCK: %3d (row: %d)\n", r->lines[row], row);\r
+       if (r->lines[row] == 0) {\r
+               r->state[row] = ANY;\r
+               wake_waiters(r, row);\r
+       }\r
+       pthread_mutex_unlock(&r->lock);\r
+       cb(ret, param);\r
+}\r
+\r
+void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param)\r
+{\r
+       struct io_ret ret;\r
+       \r
+       pthread_mutex_lock(&r->lock);\r
+       assert(r->lines[row] == -1); /* try to catch misuse. */\r
+       r->lines[row] = 0;\r
+       r->state[row] = ANY;\r
+       DPRINTF("WUNLOCK: %3d (row: %d)\n", r->lines[row], row);\r
+       wake_waiters(r, row);\r
+       pthread_mutex_unlock(&r->lock);\r
+       cb(ret, param);\r
+}\r
+\r
+/* consumer calls */\r
+static void do_next_io_req(struct pending_io_req *req)\r
+{\r
+       struct io_ret          ret;\r
+       void  *param;\r
+       \r
+       switch (req->op) {\r
+       case IO_READ:\r
+               ret.type = IO_BLOCK_T;\r
+               ret.u.b  = readblock(req->u.r.addr);\r
+               break;\r
+       case IO_WRITE:\r
+               ret.type = IO_INT_T;\r
+               ret.u.i  = writeblock(req->u.w.addr, req->u.w.block);\r
+               DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr);\r
+               break;\r
+       case IO_ALLOC:\r
+               ret.type = IO_ADDR_T;\r
+               ret.u.a  = allocblock(req->u.a.block);\r
+               break;\r
+       case IO_RWAKE:\r
+               DPRINTF("WAKE DEFERRED RLOCK!\n");\r
+               ret.type = IO_INT_T;\r
+               ret.u.i  = 0;\r
+               break;\r
+       case IO_WWAKE:\r
+               DPRINTF("WAKE DEFERRED WLOCK!\n");\r
+               ret.type = IO_INT_T;\r
+               ret.u.i  = 0;\r
+               break;\r
+       default:\r
+               DPRINTF("Unknown IO operation on pending list!\n");\r
+               return;\r
+       }\r
+       \r
+       param = req->param;\r
+       DPRINTF("freeing idx %d to slot %lu.\n", PENDING_IO_IDX(req), PENDING_IO_MASK(io_free));\r
+       pthread_mutex_lock(&pending_io_lock);\r
+       pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req);\r
+       DPRINTF("       : prod: %lu cons: %lu free: %lu\n", io_prod, io_cons, io_free);\r
+       pthread_mutex_unlock(&pending_io_lock);\r
+       \r
+       assert(req->cb != NULL);\r
+       req->cb(ret, param);\r
+               \r
+}\r
+\r
+void *io_thread(void *param) \r
+{\r
+       int tid;\r
+       struct pending_io_req *req;\r
+       \r
+       /* Set this thread's tid. */\r
+    tid = *(int *)param;\r
+    free(param);\r
+    \r
+    DPRINTF("IOT %2d started.\n", tid);\r
+    \r
+start:\r
+    pthread_mutex_lock(&pending_io_lock);\r
+    while (io_prod == io_cons) {\r
+        pthread_cond_wait(&pending_io_cond, &pending_io_lock);\r
+    }\r
+    \r
+    if (io_prod == io_cons) {\r
+        /* unnecessary wakeup. */\r
+        pthread_mutex_unlock(&pending_io_lock);\r
+        goto start;\r
+    }\r
+    \r
+       req = PENDING_IO_ENT(io_cons++);\r
+       DPRINTF("IOT %2d has req %04d(%p).\n", tid, PENDING_IO_IDX(req), req);\r
+       DPRINTF("       : prod: %lu cons: %lu free: %lu\n", io_prod, io_cons, io_free);\r
+       pthread_mutex_unlock(&pending_io_lock);\r
+       \r
+       \r
+    do_next_io_req(req);\r
+    \r
+       goto start;\r
+       \r
+}\r
+\r
+static pthread_t io_pool[IO_POOL_SIZE];\r
+void start_io_threads(void)\r
+\r
+{      \r
+       int i, tid=0;\r
+       \r
+        for (i=0; i < IO_POOL_SIZE; i++) {\r
+        int ret, *t;\r
+        t = (int *)malloc(sizeof(int));\r
+        *t = tid++;\r
+        ret = pthread_create(&io_pool[i], NULL, io_thread, t);\r
+        if (ret != 0) printf("Error starting thread %d\n", i);\r
+    }\r
+       \r
+}\r
+\r
+void init_block_async(void)\r
+{\r
+       init_pending_io();\r
+       start_io_threads();\r
+}\r
diff --git a/tools/blktap/block-async.h b/tools/blktap/block-async.h
new file mode 100755 (executable)
index 0000000..b19d464
--- /dev/null
@@ -0,0 +1,69 @@
+/* block-async.h\r
+ * \r
+ * Asynchronous block wrappers for parallax.\r
+ */\r
\r
+#ifndef _BLOCKASYNC_H_\r
+#define _BLOCKASYNC_H_\r
+\r
+#include <assert.h>\r
+#include <xc.h>\r
+#include "vdi.h"\r
+\r
+struct io_ret\r
+{\r
+       enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type;\r
+       union {\r
+               u64   a;\r
+               char *b;\r
+               int   i;\r
+       } u;\r
+};\r
+\r
+typedef void (*io_cb_t)(struct io_ret r, void *param);\r
+\r
+/* per-vdi lock structures to make sure requests run in a safe order. */\r
+struct radix_wait {\r
+       enum {RLOCK, WLOCK} type;\r
+       io_cb_t  cb;\r
+       void    *param;\r
+       struct radix_wait *next;\r
+};\r
+\r
+struct radix_lock {\r
+       pthread_mutex_t lock;\r
+       int                    lines[1024];\r
+       struct radix_wait     *waiters[1024];\r
+       enum {ANY, READ, STOP} state[1024];\r
+};\r
+void radix_lock_init(struct radix_lock *r);\r
+\r
+void block_read(u64 addr, io_cb_t cb, void *param);\r
+void block_write(u64 addr, char *block, io_cb_t cb, void *param);\r
+void block_alloc(char *block, io_cb_t cb, void *param);\r
+void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param);\r
+void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param);\r
+void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param);\r
+void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param);\r
+void init_block_async(void);\r
+\r
+static inline u64 IO_ADDR(struct io_ret r)\r
+{\r
+       assert(r.type == IO_ADDR_T);\r
+       return r.u.a;\r
+}\r
+\r
+static inline char *IO_BLOCK(struct io_ret r)\r
+{\r
+       assert(r.type == IO_BLOCK_T);\r
+       return r.u.b;\r
+}\r
+\r
+static inline int IO_INT(struct io_ret r)\r
+{\r
+       assert(r.type == IO_INT_T);\r
+       return r.u.i;\r
+}\r
+\r
+\r
+#endif //_BLOCKASYNC_H_\r
diff --git a/tools/blktap/blockstore-tls.c b/tools/blktap/blockstore-tls.c
deleted file mode 100644 (file)
index 67808d7..0000000
+++ /dev/null
@@ -1,161 +0,0 @@
-/**************************************************************************
- * 
- * blockstore.c
- *
- * Simple block store interface
- *
- */
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include "blockstore.h"
-#include "parallax-threaded.h"
-
-/*static int block_fp = -1;*/
-static int fd_list[READ_POOL_SIZE+1];
-/**
- * readblock: read a block from disk
- *   @id: block id to read
- *
- *   @return: pointer to block, NULL on error
- */
-
-void *readblock(u64 id) 
-{
-    void *block;
-    int tid = (int)pthread_getspecific(tid_key);
-    
-    if (lseek64(fd_list[tid], ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
-        perror("readblock lseek");
-        goto err;
-    }
-    if ((block = malloc(BLOCK_SIZE)) == NULL) {
-        perror("readblock malloc");
-        goto err;
-    }
-    if (read(fd_list[tid], block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("readblock read");
-        free(block);
-        goto err;
-    }
-    return block;
-    
-err:
-    return NULL;
-}
-
-/**
- * writeblock: write an existing block to disk
- *   @id: block id
- *   @block: pointer to block
- *
- *   @return: zero on success, -1 on failure
- */
-int writeblock(u64 id, void *block) 
-{
-    int tid = (int)pthread_getspecific(tid_key);
-    
-    if (lseek64(fd_list[tid], ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        perror("writeblock lseek");
-        goto err;
-    }
-    if (write(fd_list[tid], block, BLOCK_SIZE) < 0) {
-        perror("writeblock write");
-        goto err;
-    }
-    return 0;
-
-err:
-    return -1;
-}
-
-/**
- * allocblock: write a new block to disk
- *   @block: pointer to block
- *
- *   @return: new id of block on disk
- */
-
-u64 allocblock(void *block) 
-{
-    u64 lb;
-    off64_t pos;
-    int tid = (int)pthread_getspecific(tid_key);
-
-    pos = lseek64(fd_list[tid], 0, SEEK_END);
-    if (pos == (off64_t)-1) {
-        perror("allocblock lseek");
-        goto err;
-    }
-    if (pos % BLOCK_SIZE != 0) {
-        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
-        goto err;
-    }
-    if (write(fd_list[tid], block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("allocblock write");
-        goto err;
-    }
-    lb = pos / BLOCK_SIZE + 1;
-    
-    return lb;
-    
-err:
-    return 0;
-    
-}
-
-
-/**
- * newblock: get a new in-memory block set to zeros
- *
- *   @return: pointer to new block, NULL on error
- */
-void *newblock() 
-{
-    void *block = malloc(BLOCK_SIZE);
-    if (block == NULL) {
-        perror("newblock");
-        return NULL;
-    }
-    memset(block, 0, BLOCK_SIZE);
-    return block;
-}
-
-
-/**
- * freeblock: unallocate an in-memory block
- *   @id: block id (zero if this is only in-memory)
- *   @block: block to be freed
- */
-void freeblock(void *block) 
-{
-    if (block != NULL)
-        free(block);
-}
-
-
-int __init_blockstore(void)
-{
-    int i;
-    
-    for (i=0; i<(READ_POOL_SIZE+1); i++) {
-        
-        fd_list[i] = open("blockstore.dat", 
-                O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-        if (fd_list[i] < 0) {
-            perror("open");
-            return -1;
-        }
-    }
-    return 0;
-}
index 36903fe09e5ec669c3365cfdbeb5d0034f68d36c..a9dde6e4614e8797985a233d67d9a7bbacf0ef7d 100644 (file)
@@ -19,7 +19,7 @@
 #include <pthread.h>
 #include "parallax-threaded.h"
 
-#define BLOCKSTORE_REMOTE
+//#define BLOCKSTORE_REMOTE
 //#define BSDEBUG
 
 #define RETRY_TIMEOUT 1000000 /* microseconds */
@@ -942,7 +942,8 @@ u64 allocblock_hint(void *block, u64 hint) {
 void *readblock(u64 id) {
     void *block;
     int block_fp;
-    
+   
+//printf("readblock(%llu)\n", id); 
     block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644);
 
     if (block_fp < 0) {
@@ -1336,6 +1337,7 @@ int __init_blockstore(void)
 void __exit_blockstore(void)
 {
     int i;
+#ifdef BLOCKSTORE_REMOTE
     pthread_mutex_destroy(&ptmutex_recv);
     pthread_mutex_destroy(&ptmutex_luid);
     pthread_mutex_destroy(&ptmutex_queue);
@@ -1345,4 +1347,5 @@ void __exit_blockstore(void)
         pthread_mutex_destroy(&(pool_thread[i].ptmutex));
         pthread_cond_destroy(&(pool_thread[i].ptcv));
     }
+#endif
 }
diff --git a/tools/blktap/libgnbd/Makefile b/tools/blktap/libgnbd/Makefile
deleted file mode 100644 (file)
index 4297c02..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-
-CFLAGS += -Wall -Werror -g
-LDFLAGS += -g
-
-libgnbd.a: libgnbd.o
-       $(AR) r $@ $<
-
-gnbdtest: gnbdtest.o libgnbd.a
diff --git a/tools/blktap/libgnbd/gnbdtest.c b/tools/blktap/libgnbd/gnbdtest.c
deleted file mode 100644 (file)
index bc39159..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-
-#include <err.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <sys/poll.h>
-
-#include "libgnbd.h"
-
-#define PRINTF(x) printf x
-#if 0
-#define DFPRINTF(x...) fprintf(stderr, ##x)
-#define DPRINTF(x) DFPRINTF x
-#else
-#define DPRINTF(x)
-#endif
-
-static unsigned char buf1[8 << 9];
-static unsigned char buf2[8 << 9];
-static unsigned char buf3[8 << 9];
-
-int
-main(int argc, char **argv)
-{
-       struct gnbd_handle *gh;
-       struct pollfd pfd[1];
-       int err, tout;
-
-       gh = gnbd_setup("panik", 0x38e7, "cl349-nahant-beta2-root1",
-           "arcadians.cl.cam.ac.uk");
-       if (gh == NULL)
-               errx(1, "gnbd_setup");
-
-       memset(pfd, 0, sizeof(pfd));
-       pfd[0].fd = gnbd_fd(gh);
-       pfd[0].events = POLLIN;
-
-       while ((tout = poll(pfd, 1, 0)) >= 0) {
-               if (tout == 0)
-                       continue;
-               DPRINTF(("event\n"));
-               if (pfd[0].revents) {
-                       err = gnbd_reply(gh);
-                       pfd[0].events = POLLIN;
-                       switch (err) {
-                       case GNBD_LOGIN_DONE:
-                               DPRINTF(("sectors: %08llu\n",
-                                           gnbd_sectors(gh)));
-                               err = gnbd_read(gh, 8, 8, buf2, 1);
-                               if (err)
-                                       warnx("gnbd_read");
-                               err = gnbd_read(gh, 0, 8, buf1, 0);
-                               if (err)
-                                       warnx("gnbd_read");
-                               err = gnbd_read(gh, 16, 8, buf3, 2);
-                               if (err)
-                                       warnx("gnbd_read");
-                               break;
-                       case GNBD_REQUEST_DONE:
-                               DPRINTF(("request done %ld\n",
-                                           gnbd_finished_request(gh)));
-                               if (0 && gnbd_finished_request(gh) == 0) {
-                                       write(1, buf1, 8 << 9);
-                                       err = gnbd_write(gh, 0, 8, buf1, 10);
-                                       if (err)
-                                               warnx("gnbd_write");
-                               }
-                               break;
-                       case GNBD_CONTINUE:
-                               DPRINTF(("continue\n"));
-                               break;
-                       case 0:
-                               break;
-                       case GNBD_CONTINUE_WRITE:
-                               DPRINTF(("continue write\n"));
-                               pfd[0].events |= POLLOUT;
-                               break;
-                       default:
-                               warnx("gnbd_reply error");
-                               break;
-                       }
-                       DPRINTF(("got gnbd reply\n"));
-               }
-       }
-
-       return 0;
-}
diff --git a/tools/blktap/libgnbd/libgnbd.c b/tools/blktap/libgnbd/libgnbd.c
deleted file mode 100644 (file)
index 2856ca3..0000000
+++ /dev/null
@@ -1,647 +0,0 @@
-/* libgnbd.c
- * 
- * gnbd client library
- *
- * Copyright (c) 2005, Christian Limpach
- */
-  
-#include <byteswap.h>
-#include <endian.h>
-#include <err.h>
-#include <errno.h>
-#include <netdb.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/types.h>
-
-#include <stdio.h>
-
-#include "libgnbd.h"
-
-#define        PROTOCOL_VERSION        2
-
-#define        EXTERN_KILL_GSERV_REQ   5
-#define        EXTERN_LOGIN_REQ        6
-
-#define        GNBD_REQUEST_MAGIC      0x37a07e00
-#define        GNBD_KEEP_ALIVE_MAGIC   0x5b46d8c2
-#define        GNBD_REPLY_MAGIC        0x41f09370
-
-enum {
-       GNBD_CMD_READ = 0,
-       GNBD_CMD_WRITE = 1,
-       GNBD_CMD_DISC = 2,
-       GNBD_CMD_PING = 3
-};
-
-#if __BYTE_ORDER == __BIG_ENDIAN
-#define htonll(x) (x)
-#define ntohll(x) (x)
-#endif
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-#define htonll(x) bswap_64(x)
-#define ntohll(x) bswap_64(x)
-#endif
-
-#define PRINTF(x) printf x
-#if 0
-#define DFPRINTF(x...) fprintf(stderr, ##x)
-#define DPRINTF(x) DFPRINTF x
-#else
-#define DPRINTF(x)
-#endif
-
-struct gnbd_request {
-       struct gnbd_request     *gr_next;
-       unsigned char           *gr_buf;
-       ssize_t                 gr_size;
-       ssize_t                 gr_done;
-       unsigned long           gr_cookie;
-};
-
-struct gnbd_handle {
-       int                     gh_fd;
-       unsigned int            gh_flags;
-       uint64_t                gh_sectors;
-       char                    gh_devname[32];
-       char                    gh_nodename[65];
-       struct sockaddr_in      gh_sin;
-       struct gnbd_request     *gh_outstanding_requests;
-       struct gnbd_request     **gh_outstanding_requests_last;
-       struct gnbd_request     *gh_incoming_request;
-       unsigned long           gh_finished_request;
-};
-#define        GHF_EXPECT_KILL_GSERV_REPLY     0x0001
-#define        GHF_EXPECT_LOGIN_REPLY          0x0002
-#define        GHF_INCOMING_REQUEST            0x0004
-
-struct device_req {
-       char            name[32];
-};
-
-struct node_req {
-       char            node_name[65];
-};
-
-struct login_req {
-        uint64_t       timestamp;
-        uint16_t       version;
-        uint8_t                pad[6];
-        char           devname[32];
-};
-
-struct login_reply {
-        uint64_t       sectors;
-        uint16_t       version;
-        uint8_t                err;
-        uint8_t                pad[5];
-};
-
-struct gnbd_server_request {
-       uint32_t        magic;
-       uint32_t        type;
-       char            handle[8];
-       uint64_t        from;
-       uint32_t        len;
-} __attribute__ ((packed));
-
-struct gnbd_server_reply {
-       uint32_t        magic;
-       uint32_t        error;
-       char            handle[8];
-} __attribute__ ((packed));
-
-static int
-read_buf(int fd, void *buf, size_t count, size_t *read_count)
-{
-       int err;
-
-       err = read(fd, buf, count);
-       if (read_count) {
-               if (err >= 0)
-                       *read_count = err;
-       } else if (err != count)
-               return EINTR;   /* xxx */
-       return err < 0;
-}
-
-static int
-read_4(int fd, unsigned long *val)
-{
-       unsigned long buf;
-       int err;
-
-       err = read_buf(fd, &buf, sizeof(buf), NULL);
-       if (err == 0)
-               *val = ntohl(buf);
-       return err;
-}
-
-static int
-write_buf(int fd, void *buf, size_t count)
-{
-       int err;
-
-       err = write(fd, buf, count);
-       return err < 0;
-}
-
-static int
-write_4(int fd, unsigned long val)
-{
-       unsigned long buf;
-       int err;
-
-       buf = htonl(val);
-       err = write_buf(fd, &buf, sizeof(buf));
-       return err;
-}
-
-
-static int
-socket_connect(struct gnbd_handle *gh)
-{
-       int err;
-
-       if (gh->gh_fd >= 0)
-               return 0;
-
-       gh->gh_fd = socket(PF_INET, SOCK_STREAM, 0);
-       if (gh->gh_fd < 0) {
-               warn("socket");
-               return gh->gh_fd;
-       }
-
-       err = connect(gh->gh_fd, (struct sockaddr *)&gh->gh_sin,
-           sizeof(gh->gh_sin));
-       if (err) {
-               warn("connect");
-               goto out;
-       }
-
-       return 0;
- out:
-       close (gh->gh_fd);
-       gh->gh_fd = -1;
-       return err;
-}
-
-static int
-socket_shutdown(struct gnbd_handle *gh)
-{
-
-       close (gh->gh_fd);
-       gh->gh_fd = -1;
-       return 0;
-}
-
-static int
-find_request(struct gnbd_handle *gh, struct gnbd_request *gr)
-{
-       struct gnbd_request **tmp;
-
-       for (tmp = &gh->gh_outstanding_requests; *tmp;
-            tmp = &(*tmp)->gr_next) {
-               if (*tmp == gr) {
-                       *tmp = (*tmp)->gr_next;
-                       if (*tmp == NULL)
-                               gh->gh_outstanding_requests_last = tmp;
-                       return 0;
-               }
-       }
-       return ENOENT;
-}
-
-static int
-kill_gserv(struct gnbd_handle *gh)
-{
-       struct device_req dr;
-       struct node_req nr;
-       int err;
-
-       DPRINTF(("gnbd_kill_gserv\n"));
-       err = socket_connect(gh);
-       if (err) {
-               warnx("socket_connect");
-               return err;
-       }
-
-       err = write_4(gh->gh_fd, EXTERN_KILL_GSERV_REQ);
-       if (err) {
-               warnx("send EXTERN_LOGIN_REQ failed");
-               goto out;
-       }
-
-       strncpy(dr.name, gh->gh_devname, sizeof(dr.name));
-       err = write_buf(gh->gh_fd, &dr, sizeof(dr));
-       if (err) {
-               warnx("send device_req failed");
-               goto out;
-       }
-
-       strncpy(nr.node_name, gh->gh_nodename, sizeof(nr.node_name));
-       err = write_buf(gh->gh_fd, &nr, sizeof(nr));
-       if (err) {
-               warnx("send node_req failed");
-               goto out;
-       }
-
-       gh->gh_flags |= GHF_EXPECT_KILL_GSERV_REPLY;
-       DPRINTF(("gnbd_kill_gserv ok\n"));
-
-       return 0;
- out:
-       socket_shutdown(gh);
-       return err;
-}
-
-static int
-login(struct gnbd_handle *gh)
-{
-       struct login_req lr;
-       struct node_req nr;
-       int err;
-       uint64_t timestamp;
-       struct timeval tv;
-
-       DPRINTF(("gnbd_login\n"));
-       err = socket_connect(gh);
-       if (err) {
-               warnx("socket_connect");
-               return err;
-       }
-
-       err = write_4(gh->gh_fd, EXTERN_LOGIN_REQ);
-       if (err) {
-               warnx("send EXTERN_LOGIN_REQ failed");
-               goto out;
-       }
-
-       err = gettimeofday(&tv, NULL);
-       if (err) {
-               warnx("gettimeofday");
-               goto out;
-       }
-       timestamp = (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec;
-
-       lr.timestamp = htonll(timestamp);
-       lr.version = htons(PROTOCOL_VERSION);
-       strncpy(lr.devname, gh->gh_devname, sizeof(lr.devname));
-       err = write_buf(gh->gh_fd, &lr, sizeof(lr));
-       if (err) {
-               warnx("send login_req failed");
-               goto out;
-       }
-
-       strncpy(nr.node_name, gh->gh_nodename, sizeof(nr.node_name));
-       err = write_buf(gh->gh_fd, &nr, sizeof(nr));
-       if (err) {
-               warnx("send node_req failed");
-               goto out;
-       }
-
-       gh->gh_flags |= GHF_EXPECT_LOGIN_REPLY;
-
-       DPRINTF(("gnbd_login ok\n"));
-       return 0;
- out:
-       socket_shutdown(gh);
-       return err;
-}
-
-static int
-kill_gserv_reply(struct gnbd_handle *gh)
-{
-       unsigned long reply;
-       int err;
-
-       DPRINTF(("read gnbd_kill_gserv_reply\n"));
-       err = read_4(gh->gh_fd, &reply);
-       if (err) {
-               warnx("read kill_gserv_reply failed");
-               return err;
-       }
-
-       if (reply && reply != ENODEV) {
-               warnx("kill gserv failed: %s", strerror(reply));
-               return reply;
-       }
-
-       gh->gh_flags &= ~GHF_EXPECT_KILL_GSERV_REPLY;
-       socket_shutdown(gh);
-
-       err = login(gh);
-       if (err)
-               warnx("gnbd_login");
-
-       return err;
-}
-
-static int
-login_reply(struct gnbd_handle *gh)
-{
-       struct login_reply lr;
-       int err;
-
-       DPRINTF(("read gnbd_login_reply\n"));
-       err = read_buf(gh->gh_fd, &lr, sizeof(lr), NULL);
-       if (err) {
-               warnx("read login_reply failed");
-               return err;
-       }
-
-       if (lr.err) {
-               if (lr.version) {
-                       warnx("gnbd version mismatch %04x != %04x",
-                           PROTOCOL_VERSION, ntohs(lr.version));
-                       return EINVAL;
-               }
-               warnx("login refused: %s", strerror(lr.err));
-               return lr.err;
-       }
-       gh->gh_sectors = ntohll(lr.sectors);
-
-       gh->gh_flags &= ~GHF_EXPECT_LOGIN_REPLY;
-
-       return GNBD_LOGIN_DONE;
-}
-
-static int
-incoming_request(struct gnbd_handle *gh)
-{
-       struct gnbd_request *gr = gh->gh_incoming_request;
-       ssize_t done;
-       int err;
-
-       DPRINTF(("incoming_request: done %d size %d\n", gr->gr_done,
-                   gr->gr_size));
-       err = read_buf(gh->gh_fd, gr->gr_buf + gr->gr_done,
-           gr->gr_size - gr->gr_done, &done);
-       if (err)
-               goto out;
-
-       DPRINTF(("incoming_request: got %d\n", done));
-       gr->gr_done += done;
-       if (gr->gr_done == gr->gr_size) {
-               gh->gh_flags &= ~GHF_INCOMING_REQUEST;
-               gh->gh_finished_request = gr->gr_cookie;
-               free(gr);
-               return GNBD_REQUEST_DONE;
-       }
-
-       return GNBD_CONTINUE;
-
- out:
-       gh->gh_flags &= ~GHF_INCOMING_REQUEST;
-       gh->gh_finished_request = 0;
-       free(gr);
-       return err;
-}
-
-
-
-int
-gnbd_close(struct gnbd_handle *gh)
-{
-       int err;
-       struct gnbd_request **tmp;
-
-       for (tmp = &gh->gh_outstanding_requests; *tmp; tmp = &(*tmp)->gr_next)
-               free(*tmp);
-
-       if (gh->gh_flags & GHF_INCOMING_REQUEST)
-               free(gh->gh_incoming_request);
-
-       err = close(gh->gh_fd);
-       if (err)
-               warnx("close");
-       free(gh);
-
-       return err;
-}
-
-int
-gnbd_fd(struct gnbd_handle *gh)
-{
-       return gh->gh_fd;
-}
-
-unsigned long
-gnbd_finished_request(struct gnbd_handle *gh)
-{
-       return gh->gh_finished_request;
-}
-
-int
-gnbd_read(struct gnbd_handle *gh, uint64_t sector, ssize_t count,
-    unsigned char *buf, unsigned long cookie)
-{
-       struct gnbd_server_request gsr;
-       struct gnbd_request *gr;
-       int err;
-
-       gr = malloc(sizeof(struct gnbd_request));
-       if (gr == NULL)
-               return ENOMEM;
-       memset(gr, 0, sizeof(gr));
-
-       gr->gr_buf = buf;
-       gr->gr_size = count << 9;
-       gr->gr_done = 0;
-       gr->gr_cookie = cookie;
-
-       gsr.magic = htonl(GNBD_REQUEST_MAGIC);
-       gsr.type = htonl(GNBD_CMD_READ);
-       gsr.from = htonll(sector << 9);
-       gsr.len = htonl(gr->gr_size);
-       memset(gsr.handle, 0, sizeof(gsr.handle));
-       memcpy(gsr.handle, &gr, sizeof(gr));
-
-       err = write_buf(gh->gh_fd, &gsr, sizeof(gsr));
-       if (err) {
-               warnx("write_buf");
-               goto out;
-       }
-
-       *gh->gh_outstanding_requests_last = gr;
-       gh->gh_outstanding_requests_last = &gr->gr_next;
-
-       return 0;
-
- out:
-       free(gr);
-       return err;
-}
-
-int
-gnbd_write(struct gnbd_handle *gh, uint64_t sector, ssize_t count,
-    unsigned char *buf, unsigned long cookie)
-{
-       struct gnbd_server_request gsr;
-       struct gnbd_request *gr;
-       int err;
-
-       gr = malloc(sizeof(struct gnbd_request));
-       if (gr == NULL)
-               return ENOMEM;
-       memset(gr, 0, sizeof(gr));
-
-       gr->gr_buf = buf;
-       gr->gr_size = count << 9;
-       gr->gr_done = 0;
-       gr->gr_cookie = cookie;
-
-       gsr.magic = htonl(GNBD_REQUEST_MAGIC);
-       gsr.type = htonl(GNBD_CMD_WRITE);
-       gsr.from = htonll(sector << 9);
-       gsr.len = htonl(gr->gr_size);
-       memset(gsr.handle, 0, sizeof(gsr.handle));
-       memcpy(gsr.handle, &gr, sizeof(gr));
-
-       err = write_buf(gh->gh_fd, &gsr, sizeof(gsr));
-       if (err) {
-               warnx("write_buf");
-               goto out;
-       }
-
-       /* XXX handle non-blocking socket */
-       err = write_buf(gh->gh_fd, buf, gr->gr_size);
-       if (err) {
-               warnx("write_buf");
-               goto out;
-       }
-       gr->gr_done += gr->gr_size;
-
-       *gh->gh_outstanding_requests_last = gr;
-       gh->gh_outstanding_requests_last = &gr->gr_next;
-
-       DPRINTF(("write done\n"));
-
-       return 0;
-
- out:
-       free(gr);
-       return err;
-}
-
-int
-gnbd_reply(struct gnbd_handle *gh)
-{
-       struct gnbd_server_reply gsr;
-       struct gnbd_request *gr;
-       int err;
-
-       DPRINTF(("gnbd_reply flags %x\n", gh->gh_flags));
-       if ((gh->gh_flags & GHF_EXPECT_KILL_GSERV_REPLY))
-               return kill_gserv_reply(gh);
-       if ((gh->gh_flags & GHF_EXPECT_LOGIN_REPLY))
-               return login_reply(gh);
-       if ((gh->gh_flags & GHF_INCOMING_REQUEST))
-               return incoming_request(gh);
-
-       DPRINTF(("read response\n"));
-       err = read_buf(gh->gh_fd, &gsr, sizeof(gsr), NULL);
-       if (err) {
-               warnx("read gnbd_reply failed");
-               return err;
-       }
-
-       if (ntohl(gsr.error)) {
-               warnx("gnbd server reply error: %s", strerror(gsr.error));
-               return gsr.error;
-       }
-
-       switch (ntohl(gsr.magic)) {
-       case GNBD_KEEP_ALIVE_MAGIC:
-               DPRINTF(("read keep alive magic\n"));
-               return GNBD_CONTINUE;
-       case GNBD_REPLY_MAGIC:
-               DPRINTF(("read reply magic\n"));
-               memcpy(&gr, gsr.handle, sizeof(gr));
-               err = find_request(gh, gr);
-               if (err) {
-                       warnx("unknown request");
-                       return err;
-               }
-               if (gr->gr_done != gr->gr_size) {
-                       gh->gh_incoming_request = gr;
-                       gh->gh_flags |= GHF_INCOMING_REQUEST;
-                       return GNBD_CONTINUE;
-               } else {
-                       gh->gh_finished_request = gr->gr_cookie;
-                       free(gr);
-                       return GNBD_REQUEST_DONE;
-               }
-       default:
-               break;
-       }
-
-       return GNBD_CONTINUE;
-}
-
-uint64_t
-gnbd_sectors(struct gnbd_handle *gh)
-{
-
-       return gh->gh_sectors;
-}
-
-struct gnbd_handle *
-gnbd_setup(char *server, unsigned int port, char *devname, char *nodename)
-{
-       struct gnbd_handle *gh;
-       struct addrinfo *res, *ai;
-       int err;
-
-       gh = malloc(sizeof(struct gnbd_handle));
-       if (gh == NULL)
-               return NULL;
-       memset(gh, 0, sizeof(gh));
-       gh->gh_fd = -1;
-       gh->gh_outstanding_requests_last = &gh->gh_outstanding_requests;
-
-       strncpy(gh->gh_devname, devname, sizeof(gh->gh_devname));
-       strncpy(gh->gh_nodename, nodename, sizeof(gh->gh_nodename));
-
-       err = getaddrinfo(server, NULL, NULL, &res);
-       if (err) {
-               if (err != EAI_SYSTEM)
-                       warnx("getaddrinfo: %s", gai_strerror(err));
-               else
-                       warn("getaddrinfo: %s", gai_strerror(err));
-               goto out;
-       }
-
-       for (ai = res; ai; ai = ai->ai_next) {
-               if (ai->ai_socktype != SOCK_STREAM)
-                       continue;
-               if (ai->ai_family == AF_INET)
-                       break;
-       }
-
-       if (ai == NULL)
-               goto out;
-
-       gh->gh_sin.sin_family = ai->ai_family;
-       gh->gh_sin.sin_port = htons(port);
-       memcpy(&gh->gh_sin.sin_addr,
-           &((struct sockaddr_in *)ai->ai_addr)->sin_addr,
-           sizeof(gh->gh_sin.sin_addr));
-
-       err = kill_gserv(gh);
-       if (err) {
-               warnx("gnbd_kill_gserv");
-               goto out;
-       }
-
-       freeaddrinfo(res);
-       return gh;
- out:
-       free(gh);
-       freeaddrinfo(res);
-       return NULL;
-}
diff --git a/tools/blktap/libgnbd/libgnbd.h b/tools/blktap/libgnbd/libgnbd.h
deleted file mode 100644 (file)
index 9fb3dbb..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-/* libgnbd.h
- *
- * gnbd client library
- *
- * Copyright (c) 2005, Christian Limpach
- */
-     
-#define GNBD_LOGIN_DONE                0x10001
-#define GNBD_REQUEST_DONE      0x10002
-#define GNBD_CONTINUE          0x10003
-#define GNBD_CONTINUE_WRITE    0x10004
-
-struct gnbd_handle;
-int gnbd_close(struct gnbd_handle *);
-int gnbd_fd(struct gnbd_handle *);
-unsigned long gnbd_finished_request(struct gnbd_handle *);
-int gnbd_kill_gserv(struct gnbd_handle *);
-int gnbd_login(struct gnbd_handle *);
-int gnbd_read(struct gnbd_handle *, uint64_t, ssize_t, unsigned char *,
-    unsigned long);
-int gnbd_write(struct gnbd_handle *, uint64_t, ssize_t, unsigned char *,
-    unsigned long);
-int gnbd_reply(struct gnbd_handle *);
-uint64_t gnbd_sectors(struct gnbd_handle *);
-struct gnbd_handle *gnbd_setup(char *, unsigned int, char *, char *);
index 25b80dea166bdf12c2c92da17304b8aa9bec7f0e..47e469b111a40923d257961d2450587c1cf942bf 100644 (file)
@@ -145,33 +145,33 @@ void blkif_destroy(blkif_be_destroy_t *destroy)
     destroy->status = BLKIF_BE_STATUS_OKAY;
 }
 
-void vbd_grow(blkif_be_vbd_grow_t *grow) 
+void vbd_create(blkif_be_vbd_create_t *create)
 {
     blkif_t            *blkif;
     vdi_t              *vdi, **vdip;
-    blkif_vdev_t        vdevice = grow->vdevice;
+    blkif_vdev_t        vdevice = create->vdevice;
 
-    DPRINTF("parallax (vbd_grow): grow=%p\n", grow); 
+    DPRINTF("parallax (vbd_create): create=%p\n", create); 
     
-    blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
+    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
     if ( blkif == NULL )
     {
-        DPRINTF("vbd_grow attempted for non-existent blkif (%u,%u)\n", 
-                grow->domid, grow->blkif_handle); 
-        grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+        DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", 
+                create->domid, create->blkif_handle); 
+        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
         return;
     }
 
     /* VDI identifier is in grow->extent.sector_start */
-    DPRINTF("vbd_grow: grow->extent.sector_start (id) is %llx\n", 
-            grow->extent.sector_start);
+    DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", 
+            (unsigned long)create->dev_handle);
 
-    vdi = vdi_get(grow->extent.sector_start);
+    vdi = vdi_get(create->dev_handle);
     if (vdi == NULL)
     {
-        printf("parallax (vbd_grow): VDI %llx not found.\n",
-               grow->extent.sector_start);
-        grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
+        printf("parallax (vbd_create): VDI %lx not found.\n",
+               (unsigned long)create->dev_handle);
+        create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
         return;
     }
     
@@ -183,7 +183,7 @@ void vbd_grow(blkif_be_vbd_grow_t *grow)
     *vdip = vdi;
     
     DPRINTF("vbd_grow: happy return!\n"); 
-    grow->status = BLKIF_BE_STATUS_OKAY;
+    create->status = BLKIF_BE_STATUS_OKAY;
 }
 
 int parallax_control(control_msg_t *msg)
@@ -213,10 +213,10 @@ int parallax_control(control_msg_t *msg)
         blkif_destroy((blkif_be_destroy_t *)msg->msg);
         break;  
         
-    case CMSG_BLKIF_BE_VBD_GROW:
-        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
+    case CMSG_BLKIF_BE_VBD_CREATE:
+        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
             goto parse_error;
-        vbd_grow((blkif_be_vbd_grow_t *)msg->msg);
+        vbd_create((blkif_be_vbd_create_t *)msg->msg);
         break;
     }
     return 0;
index 9c853bc0354ef8012e1153d302e4cbe5938bf8dc..ee47957ada3312f24ebe0d7ff42a436cfdc2a975 100644 (file)
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <pthread.h>
 #include "blktaplib.h"
 #include "blockstore.h"
 #include "vdi.h"
+#include "block-async.h"
+#include "requests-async.h"
 
 #define PARALLAX_DEV     61440
+#define SECTS_PER_NODE   8
+
 
 #if 0
 #define DPRINTF(_f, _a...) printf ( _f , ## _a )
@@ -142,33 +147,33 @@ void blkif_destroy(blkif_be_destroy_t *destroy)
     destroy->status = BLKIF_BE_STATUS_OKAY;
 }
 
-void vbd_grow(blkif_be_vbd_grow_t *grow) 
+void vbd_create(blkif_be_vbd_create_t *create)
 {
     blkif_t            *blkif;
     vdi_t              *vdi, **vdip;
-    blkif_vdev_t        vdevice = grow->vdevice;
+    blkif_vdev_t        vdevice = create->vdevice;
 
-    DPRINTF("parallax (vbd_grow): grow=%p\n", grow); 
+    DPRINTF("parallax (vbd_create): create=%p\n", create); 
     
-    blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
+    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
     if ( blkif == NULL )
     {
-        DPRINTF("vbd_grow attempted for non-existent blkif (%u,%u)\n", 
-                grow->domid, grow->blkif_handle); 
-        grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+        DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", 
+                create->domid, create->blkif_handle); 
+        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
         return;
     }
 
     /* VDI identifier is in grow->extent.sector_start */
-    DPRINTF("vbd_grow: grow->extent.sector_start (id) is %llx\n", 
-            grow->extent.sector_start);
+    DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", 
+            (unsigned long)create->dev_handle);
 
-    vdi = vdi_get(grow->extent.sector_start);
+    vdi = vdi_get(create->dev_handle);
     if (vdi == NULL)
     {
-        printf("parallax (vbd_grow): VDI %llx not found.\n",
-               grow->extent.sector_start);
-        grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
+        printf("parallax (vbd_create): VDI %lx not found.\n",
+               (unsigned long)create->dev_handle);
+        create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
         return;
     }
     
@@ -180,7 +185,7 @@ void vbd_grow(blkif_be_vbd_grow_t *grow)
     *vdip = vdi;
     
     DPRINTF("vbd_grow: happy return!\n"); 
-    grow->status = BLKIF_BE_STATUS_OKAY;
+    create->status = BLKIF_BE_STATUS_OKAY;
 }
 
 int parallax_control(control_msg_t *msg)
@@ -210,10 +215,10 @@ int parallax_control(control_msg_t *msg)
         blkif_destroy((blkif_be_destroy_t *)msg->msg);
         break;  
         
-    case CMSG_BLKIF_BE_VBD_GROW:
-        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
+    case CMSG_BLKIF_BE_VBD_CREATE:
+        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
             goto parse_error;
-        vbd_grow((blkif_be_vbd_grow_t *)msg->msg);
+        vbd_create((blkif_be_vbd_create_t *)msg->msg);
         break;
     }
     return 0;
@@ -248,9 +253,9 @@ int parallax_probe(blkif_request_t *req, blkif_t *blkif)
             img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
             img_info[nr_vdis].device   = vdi->vdevice;
             img_info[nr_vdis].info     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
-            /* The -2 here accounts for the LSB in the radix tree */
+            /* The -1 here accounts for the LSB in the radix tree */
             img_info[nr_vdis].capacity = 
-                    ((1LL << (VDI_HEIGHT-2)) >> SECTOR_SHIFT);
+                    ((1LL << (VDI_HEIGHT-1)) * SECTS_PER_NODE);
             nr_vdis++;
             vdi = vdi->next;
         }
@@ -274,78 +279,122 @@ err:
     return BLKTAP_RESPOND;  
 }
 
+typedef struct {
+    blkif_request_t *req;
+    int              count;
+    int              error;
+    pthread_mutex_t  mutex;
+} pending_t;
+
+#define MAX_REQUESTS 64
+pending_t pending_list[MAX_REQUESTS];
+
+struct cb_param {
+       pending_t *pent;
+       int       segment;
+       u64       sector; 
+       u64       vblock; /* for debug printing -- can be removed. */
+};
+
+static void read_cb(struct io_ret r, void *in_param)
+{
+       struct cb_param *param = (struct cb_param *)in_param;
+       pending_t *p = param->pent;
+       int segment = param->segment;
+       blkif_request_t *req = p->req;
+    unsigned long size, offset, start;
+       char *dpage, *spage;
+       
+       spage  = IO_BLOCK(r);
+       if (spage == NULL) { p->error++; goto finish; }
+       dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), segment);
+    
+    /* Calculate read size and offset within the read block. */
+
+    offset = (param->sector << SECTOR_SHIFT) % BLOCK_SIZE;
+    size = ( blkif_last_sect (req->frame_and_sects[segment]) -
+             blkif_first_sect(req->frame_and_sects[segment]) + 1
+           ) << SECTOR_SHIFT;
+    start = blkif_first_sect(req->frame_and_sects[segment]) 
+            << SECTOR_SHIFT;
+
+    DPRINTF("ParallaxRead: sect: %lld (%ld,%ld),  "
+            "vblock %llx, "
+            "size %lx\n", 
+            param->sector, blkif_first_sect(p->req->frame_and_sects[segment]),
+            blkif_last_sect (p->req->frame_and_sects[segment]),
+            param->vblock, size); 
+
+    memcpy(dpage + start, spage + offset, size);
+    freeblock(spage);
+    
+    /* Done the read.  Now update the pending record. */
+ finish:
+    pthread_mutex_lock(&p->mutex);
+    p->count--;
+    
+       if (p->count == 0) {
+       blkif_response_t *rsp;
+       
+        rsp = (blkif_response_t *)req;
+        rsp->id = req->id;
+        rsp->operation = BLKIF_OP_READ;
+       if (p->error == 0) {
+               rsp->status = BLKIF_RSP_OKAY;
+       } else {
+               rsp->status = BLKIF_RSP_ERROR;
+       }
+        blktap_inject_response(rsp);       
+    }
+    
+    pthread_mutex_unlock(&p->mutex);
+       
+       free(param); /* TODO: replace with cached alloc/dealloc */
+}      
+
 int parallax_read(blkif_request_t *req, blkif_t *blkif)
 {
     blkif_response_t *rsp;
-    unsigned long size, offset, start;
-    u64 sector;
     u64 vblock, gblock;
     vdi_t *vdi;
+    u64 sector;
     int i;
     char *dpage, *spage;
+    pending_t *pent;
 
     vdi = blkif_get_vdi(blkif, req->device);
     
     if ( vdi == NULL )
         goto err;
+        
+    pent = &pending_list[ID_TO_IDX(req->id)];
+    pent->count = req->nr_segments;
+    pent->req = req;
+    pthread_mutex_init(&pent->mutex, NULL);
     
     for (i = 0; i < req->nr_segments; i++) {
-            
-        dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-        
-        /* Round the requested segment to a block address. */
-        
-        sector  = req->sector_number + (8*i);
-        vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
-        
-        /* Get that block from the store. */
-        
-        gblock = vdi_lookup_block(vdi, vblock, NULL);
-        
-        /* Calculate read size and offset within the read block. */
-        
-        offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
-        size = ( blkif_last_sect (req->frame_and_sects[i]) -
-                 blkif_first_sect(req->frame_and_sects[i]) + 1
-               ) << SECTOR_SHIFT;
-        start = blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
-        
-        /* If the block does not exist in the store, return zeros. */
-        /* Otherwise, copy that region to the guest page.          */
-        
-        DPRINTF("ParallaxRead: sect: %lld (%ld,%ld),  "
-                "vblock %llx, gblock %llx, "
-                "size %lx\n", 
-                sector, blkif_first_sect(req->frame_and_sects[i]),
-                blkif_last_sect (req->frame_and_sects[i]),
-                vblock, gblock, size); 
-       
-        if ( gblock == 0 ) {
-           
-            memset(dpage + start, '\0', size);
-            
-        } else {
-            
-            spage = readblock(gblock);
-            
-            if (spage == NULL) {
-                printf("Error reading gblock from store: %Ld\n", gblock);
-                goto err;
-            }
-            
-            memcpy(dpage + start, spage + offset, size);
-            
-            freeblock(spage);
-        }
-        
-    }
+        pthread_t tid;
+        int ret;
+        struct cb_param *p;
+
+           /* Round the requested segment to a block address. */
+           sector  = req->sector_number + (8*i);
+           vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
+
+               /* TODO: Replace this call to malloc with a cached allocation */
+               p = (struct cb_param *)malloc(sizeof(struct cb_param));
+               p->pent = pent;
+               p->sector = sector; 
+               p->segment = i;     
+               p->vblock = vblock; /* dbg */
+               
+           /* Get that block from the store. */
+           async_read(vdi, vblock, read_cb, (void *)p);
 
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = BLKIF_OP_READ;
-    rsp->status = BLKIF_RSP_OKAY;
+    }
+    
+    return BLKTAP_STOLEN;
 
-    return BLKTAP_RESPOND;
 err:
     rsp = (blkif_response_t *)req;
     rsp->id = req->id;
@@ -355,6 +404,37 @@ err:
     return BLKTAP_RESPOND;  
 }
 
+static void write_cb(struct io_ret r, void *in_param)
+{
+       struct cb_param *param = (struct cb_param *)in_param;
+       pending_t *p = param->pent;
+       blkif_request_t *req = p->req;
+
+       /* catch errors from the block code. */
+       if (IO_INT(r) < 0) p->error++;
+       
+    pthread_mutex_lock(&p->mutex);
+    p->count--;
+    
+       if (p->count == 0) {
+       blkif_response_t *rsp;
+       
+        rsp = (blkif_response_t *)req;
+        rsp->id = req->id;
+        rsp->operation = BLKIF_OP_WRITE;
+       if (p->error == 0) {
+               rsp->status = BLKIF_RSP_OKAY;
+       } else {
+               rsp->status = BLKIF_RSP_ERROR;
+       }
+        blktap_inject_response(rsp);       
+    }
+    
+    pthread_mutex_unlock(&p->mutex);
+       
+       free(param); /* TODO: replace with cached alloc/dealloc */
+}
+
 int parallax_write(blkif_request_t *req, blkif_t *blkif)
 {
     blkif_response_t *rsp;
@@ -364,13 +444,20 @@ int parallax_write(blkif_request_t *req, blkif_t *blkif)
     char *spage;
     unsigned long size, offset, start;
     vdi_t *vdi;
+    pending_t *pent;
 
     vdi = blkif_get_vdi(blkif, req->device);
     
     if ( vdi == NULL )
         goto err;
+        
+    pent = &pending_list[ID_TO_IDX(req->id)];
+    pent->count = req->nr_segments;
+    pent->req = req;
+    pthread_mutex_init(&pent->mutex, NULL);
     
     for (i = 0; i < req->nr_segments; i++) {
+        struct cb_param *p;
             
         spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
         
@@ -379,10 +466,6 @@ int parallax_write(blkif_request_t *req, blkif_t *blkif)
         sector  = req->sector_number + (8*i);
         vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
         
-        /* Get that block from the store. */
-        
-        gblock   = vdi_lookup_block(vdi, vblock, &writable);
-        
         /* Calculate read size and offset within the read block. */
         
         offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
@@ -405,27 +488,20 @@ int parallax_write(blkif_request_t *req, blkif_t *blkif)
             printf("]\n] STRANGE WRITE!\n]\n");
             goto err;
         }
-
-        if (( gblock == 0 ) || ( writable == 0 )) {
-         
-            gblock = allocblock(spage);
-            vdi_update_block(vdi, vblock, gblock);
-            
-        } else {
-            
-            /* write-in-place, no need to change mappings. */
-            writeblock(gblock, spage);
-            
-        }
-
+        
+               /* TODO: Replace this call to malloc with a cached allocation */
+               p = (struct cb_param *)malloc(sizeof(struct cb_param));
+               p->pent = pent;
+               p->sector = sector; 
+               p->segment = i;     
+               p->vblock = vblock; /* dbg */
+               
+        /* Issue the write to the store. */
+           async_write(vdi, vblock, spage, write_cb, (void *)p);
     }
 
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = BLKIF_OP_WRITE;
-    rsp->status = BLKIF_RSP_OKAY;
+    return BLKTAP_STOLEN;
 
-    return BLKTAP_RESPOND;
 err:
     rsp = (blkif_response_t *)req;
     rsp->id = req->id;
@@ -477,16 +553,19 @@ void __init_parallax(void)
 }
 
 
+
 int main(int argc, char *argv[])
 {
     DPRINTF("parallax: starting.\n"); 
     __init_blockstore();
     DPRINTF("parallax: initialized blockstore...\n"); 
+       init_block_async();
+    DPRINTF("parallax: initialized async blocks...\n"); 
     __init_vdi();
     DPRINTF("parallax: initialized vdi registry etc...\n"); 
     __init_parallax();
     DPRINTF("parallax: initialized local stuff..\n"); 
-    
+
     blktap_register_ctrl_hook("parallax_control", parallax_control);
     blktap_register_request_hook("parallax_request", parallax_request);
     DPRINTF("parallax: added ctrl + request hooks, starting listen...\n"); 
index 44aa0ac482aed1940f347c76c11ec0b9d9017a6d..579df2e6554206f8522af7b0fdfc27ed91ec2a04 100644 (file)
 #define DEBUG
 */
 
-/*
-#define STAGED
-*/
-
-#define ZERO 0LL
-#define ONE 1LL
-#define ONEMASK 0xffffffffffffffeLL
-
-
-typedef u64 *radix_tree_node;
-
-
 /* Experimental radix cache. */
 
 static  pthread_mutex_t rcache_mutex = PTHREAD_MUTEX_INITIALIZER;
@@ -276,7 +264,6 @@ radix_tree_node cloneblock(radix_tree_node block) {
  *
  *   @return: value on success, zero on error
  */
-#ifndef STAGED
 
 u64 lookup(int height, u64 root, u64 key) {
     radix_tree_node node;
@@ -318,92 +305,6 @@ u64 lookup(int height, u64 root, u64 key) {
     return ZERO;
 }
 
-#else /* STAGED */
-
-
-/* non-recursive staged lookup, assume height is 35. */
-u64 lookup(int height, u64 root, u64 key) {
-    radix_tree_node node;
-    u64 mask = ONE;
-
-printf("lookup!\n");    
-    assert(key >> 35 == 0);
-
-    /* the root block may be smaller to ensure all leaves are full */
-    height = 27;
-
-    /* now carve off equal sized chunks at each step */
-    
-    /* ROOT: (LEVEL 0) KEYLEN=35*/
-    if (getid(root) == ZERO)
-        return ZERO;
-
-    node = (radix_tree_node) readblock(getid(root));
-    if (node == NULL)
-        return ZERO;
-
-    root = node[(key >> height) & RADIX_TREE_MAP_MASK];
-    mask &= root;
-    freeblock(node);
-
-    if (height == 0)
-        return ( root & ONEMASK ) | mask;
-
-    height -= RADIX_TREE_MAP_SHIFT; /* == 18 */
-
-    /* LEVEL 1: KEYLEN=26*/
-    if (getid(root) == ZERO)
-        return ZERO;
-
-    node = (radix_tree_node) readblock(getid(root));
-    if (node == NULL)
-        return ZERO;
-
-    root = node[(key >> height) & RADIX_TREE_MAP_MASK];
-    mask &= root;
-    freeblock(node);
-
-    if (height == 0)
-        return ( root & ONEMASK ) | mask;
-
-    height -= RADIX_TREE_MAP_SHIFT; /* == 9 */
-    
-    /* LEVEL 2: KEYLEN=17*/
-    if (getid(root) == ZERO)
-        return ZERO;
-
-    node = (radix_tree_node) readblock(getid(root));
-    if (node == NULL)
-        return ZERO;
-
-    root = node[(key >> height) & RADIX_TREE_MAP_MASK];
-    mask &= root;
-    freeblock(node);
-
-    if (height == 0)
-        return ( root & ONEMASK ) | mask;
-
-    height -= RADIX_TREE_MAP_SHIFT; /* == 0 */
-    
-    /* LEVEL 3: KEYLEN=8*/
-    if (getid(root) == ZERO)
-        return ZERO;
-
-    node = (radix_tree_node) readblock(getid(root));
-    if (node == NULL)
-        return ZERO;
-
-    root = node[(key >> height) & RADIX_TREE_MAP_MASK];
-    mask &= root;
-    freeblock(node);
-
-    // if (height == 0)
-        return ( root & ONEMASK ) | mask;
-
-}
-
-#endif
-
 /*
  * update: set a radix tree entry, doing copy-on-write as necessary
  *   @height: height in bits of the radix tree
@@ -414,9 +315,6 @@ printf("lookup!\n");
  *   @returns: (possibly new) root id on success (with LSB=1), 0 on failure
  */
 
-#ifndef STAGED
-
-
 u64 update(int height, u64 root, u64 key, u64 val) {
     int offset;
     u64 child;
@@ -487,320 +385,6 @@ u64 update(int height, u64 root, u64 key, u64 val) {
     return root;
 }
 
-
-#else /* STAGED */
-
-/* When update is called, state->next points to the thing to call after 
- * update is finished. */
-
-struct cb_state_st;
-
-typedef struct {
-    /* public stuff */
-    u64 val;
-    u64 key;
-    u64 result;
-    
-    /* internal state */
-    u64 root[4];
-    radix_tree_node node[4];
-    void (*next)(struct cb_state_st *);
-    int err;
-} radix_update_t;
-
-typedef struct cb_state_st{
-    void (*next)(struct cb_state_st *); /* Next continuation. */
-    union {
-        radix_update_t update;
-    } radix;
-} cb_state_t;
-
-void s_readblock(cb_state_t *state, u64 id, void **ret)
-{
-    *ret = readblock(id);
-    state->next(state);
-}
-
-void s_allocblock(cb_state_t *state, void *block, u64 *ret)
-{
-    *ret = allocblock(block);
-    state->next(state);
-}
-        
-void s_writeblock(cb_state_t *state, u64 id, void *block, int *ret)
-{
-    *ret = writeblock(id, block);
-    state->next(state);
-}
-
-void cb_done(cb_state_t *state)
-{
-    printf("*** done ***\n");
-}
-
-/* forward prototypes. */
-void up0(cb_state_t *state);
-void up1(cb_state_t *state);
-void up2(cb_state_t *state);
-void up3(cb_state_t *state);
-void up4(cb_state_t *state);
-void up5(cb_state_t *state);
-void up6(cb_state_t *state);
-void up7(cb_state_t *state);
-void up8(cb_state_t *state);
-void up9(cb_state_t *state);
-void up10(cb_state_t *state);
-void up11(cb_state_t *state);
-void up12(cb_state_t *state);
-
-u64 update(int height, u64 root, u64 key, u64 val)
-{
-    cb_state_t state;
-    radix_update_t *u = &state.radix.update;
-    
-    u->val = val;
-    u->key = key;
-    u->root[0] = root;
-    u->root[1] = u->root[2] = u->root[3] = ZERO;
-    u->node[0] = u->node[1] = u->node[2] = u->node[3] = NULL;
-    
-    /* take a copy of the higher-scoped next continuation. */
-    u->next = state->next;
-    
-    /* update start state */
-    state->next = up0;
-    
-    for (;;)
-    {
-        state->next(state);
-        if (state->next == NULL) 
-            break;
-    }
-    
-    return u->result;
-}
-
-/* c0:*/
-void up0(cb_state_t *state) {
-    radix_update_t *u = &state->radix.update;
-    
-    state->next = up1;
-    s_readblock(state, getid(u->root[0]), (void **)&(u->node[0]));
-}
-    
-/* c1: */
-void up1(cb_state_t *state) {
-    radix_update_t *u = &state->radix.update;
-    
-    u->root[1] = u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK];
-    if (u->root[1] == ZERO) {
-        u->node[1] = (radix_tree_node) newblock();
-        /* goto next continuation (c2)*/ up2(state);return;
-    } else {
-        state->next = up2;
-        s_readblock(state, getid(u->root[1]), (void **)&(u->node[1]));
-    }
-}
-
-/* c2: */
-void up2(cb_state_t *state) {
-    radix_update_t *u = &state->radix.update;
-    
-    if ((u->root[1] != ZERO) && (!iswritable(u->root[1]))) {
-        /* need to clone this node */
-        radix_tree_node oldnode = u->node[1];
-        u->node[1] = cloneblock(u->node[1]);
-        freeblock(oldnode);
-        u->root[1] = ZERO;
-    }
-    u->root[2] = u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK];
-    if (u->root[2] == ZERO) {
-        u->node[2] = (radix_tree_node) newblock();
-        /* goto next continuation (c3)*/ up3(state);return;
-    } else {
-        state->next = up3;
-        s_readblock(state, getid(u->root[2]), (void **)&(u->node[2]));
-    }
-}
-    
-/* c3: */
-void up3(cb_state_t *state) {
-    radix_update_t *u = &state->radix.update;
-    
-    if ((u->root[2] != ZERO) && (!iswritable(u->root[2]))) {
-        /* need to clone this node */
-        radix_tree_node oldnode = u->node[2];
-        u->node[2] = cloneblock(u->node[2]);
-        freeblock(oldnode);
-        u->root[2] = ZERO;
-    }
-    u->root[3] = u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK];
-    if (u->root[3] == ZERO) {
-        u->node[3] = (radix_tree_node) newblock();
-        /* goto next continuation (c4)*/ up4(state);return;
-    } else {
-        state->next = up4;
-        s_readblock(state, getid(u->root[3]), (void **)&(u->node[3]));
-    }
-}
-    
-/* c4: */
-void up4(cb_state_t *state) {
-    radix_update_t *u = &state->radix.update;
-    
-    if ((u->root[3] != ZERO) && (!iswritable(u->root[3]))) {
-        /* need to clone this node */
-        radix_tree_node oldnode = u->node[3];
-        u->node[3] = cloneblock(u->node[3]);
-        freeblock(oldnode);
-        u->root[3] = ZERO;
-    }
-    
-    if (u->node[3][u->key & RADIX_TREE_MAP_MASK] == u->val){
-        /* no change, so we already owned the child */
-        /* goto last continuation (c12) */ up12(state);return;
-    }
-
-    u->node[3][u->key & RADIX_TREE_MAP_MASK] = u->val;
-
-    /* new/cloned blocks need to be saved */
-    if (u->root[3] == ZERO) {
-        /* mark this as an owned block */
-        state->next = up5;
-        s_allocblock(state, u->node[3], &u->root[3]);
-        /* goto continuation (c5) */ return;
-    } else {
-        state->next = up6;
-        s_writeblock(state, getid(u->root[3]), u->node[3], &u->err);
-        /* goto continuation (c6) */ return;
-    }
-}
-
-/* c5: */
-void up5(cb_state_t *state) {
-    radix_update_t *u = &state->radix.update;
-    
-    if (u->root[3])
-        u->root[3] = writable(u->root[3]);
-    /* goto continuation (c6) */ up6(state);return;
-}
-    
-/* c6: */
-void up6(cb_state_t *state) {
-    radix_update_t *u = &state->radix.update;
-    
-    if (u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK] == u->root[3]){
-        /* no change, so we already owned the child */
-        /* goto last continuation (c12) */ up12(state);return;
-    }
-    
-    u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK] = u->root[3];
-
-    /* new/cloned blocks need to be saved */
-    if (u->root[2] == ZERO) {
-        /* mark this as an owned block */
-        state->next = up7;
-        s_allocblock(state, u->node[2], &u->root[2]);
-        /* goto continuation (c7) */return;
-    } else {
-        state->next = up8;
-        s_writeblock(state, getid(u->root[2]), u->node[2], &u->err);
-        /* goto continuation (c8) */return;
-    }
-}
-
-/* c7: */
-void up7(cb_state_t *state) {
-    radix_update_t *u = &state->radix.update;
-    
-    if (u->root[2])
-        u->root[2] = writable(u->root[2]);
-    /* goto continuation (c8) */ up8(state);return;
-}
-    
-/* c8: */
-void up8(cb_state_t *state) {
-    radix_update_t *u = &state->radix.update;
-    
-    if (u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK] == u->root[2]){
-        /* no change, so we already owned the child */
-        /* goto last continuation (c12) */ up12(state);return;
-    }
-    
-    u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK] = u->root[2];
-
-    /* new/cloned blocks need to be saved */
-    if (u->root[1] == ZERO) {
-        /* mark this as an owned block */
-        state->next = up9;
-        s_allocblock(state, u->node[1], &u->root[1]);
-        /* goto continuation (c9) */return;
-    } else {
-        state->next = up10;
-        s_writeblock(state, getid(u->root[1]), u->node[1], &u->err);
-        /* goto continuation (c10) */return;
-    }
-}
-
-/* c9: */
-void up9(cb_state_t *state) {
-    radix_update_t *u = &state->radix.update;
-    
-    if (u->root[1])
-        u->root[1] = writable(u->root[1]);
-    /* goto continuation (c10) */ up10(state);return;
-}
-    
-/* c10: */
-void up10(cb_state_t *state) {
-    radix_update_t *u = &state->radix.update;
-    
-    if (u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK] == u->root[1]){
-        /* no change, so we already owned the child */
-        /* goto last continuation (c12) */ up12(state);return;
-    }
-    
-    u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK] = u->root[1];
-
-    /* new/cloned blocks need to be saved */
-    if (u->root[0] == ZERO) {
-        /* mark this as an owned block */
-        state->next = up11;
-        s_allocblock(state, u->node[0], &u->root[0]);
-        /* goto continuation (c11) */ return;
-    } else {
-        state->next = up10;
-        s_writeblock(state, getid(u->root[0]), u->node[0], &u->err);
-        /* goto continuation (c12) */ return;
-    }
-}
-
-/* c11: */
-void up11(cb_state_t *state) {
-    radix_update_t *u = &state->radix.update;
-    
-    if (u->root[0])
-        u->root[0] = writable(u->root[0]);
-    /* goto continuation (c12) */ up12(state);return;
-}
-    
-/* c12: */
-void up12(cb_state_t *state) {
-    radix_update_t *u = &state->radix.update;
-    
-    int i;
-    for (i=0;i<4;i++)
-        if(u->node[i] != NULL) freeblock(u->node[i]);
-    
-    u->result = u->root[0];
-    state->next = u->next;
-    
-    state->next(state);return;
-}
-    
-#endif
-
-
 /**
  * snapshot: create a snapshot
  *   @root: old root node
@@ -840,7 +424,6 @@ int collapse(int height, u64 proot, u64 croot)
     int i, numlinks, ret, total = 0;
     radix_tree_node pnode, cnode;
     
-//printf("proot: %Ld\n", getid(proot));
     if (height == 0) {
         height = -1; /* terminate recursion */
     } else {        
index e7de3f453e32a4882ea4dd6d95fa31f2e30125ad..61ea2205f85bf7548d1f13d319e8fb1ce69fb182 100644 (file)
 #define putid(x) ((x)<<1)
 #define writable(x) (((x)<<1)|1LL)
 #define iswritable(x) ((x)&1LL)
+#define ZERO 0LL
+#define ONE 1LL
+#define ONEMASK 0xffffffffffffffeLL
+
+#define RADIX_TREE_MAP_SHIFT 9
+#define RADIX_TREE_MAP_MASK 0x1ff
+#define RADIX_TREE_MAP_ENTRIES 512
+
+typedef u64 *radix_tree_node;
+
 
 /*
  * main api
diff --git a/tools/blktap/requests-async.c b/tools/blktap/requests-async.c
new file mode 100755 (executable)
index 0000000..bb2d07b
--- /dev/null
@@ -0,0 +1,629 @@
+/* read.c\r
+ *\r
+ * asynchronous read experiment for parallax.\r
+ */\r
+\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include <string.h>\r
+#include <assert.h>\r
+#include <pthread.h>\r
+#include "requests-async.h"\r
+#include "vdi.h"\r
+#include "radix.h"\r
+\r
+#define L1_IDX(_a) (((_a) & 0x0000000007fc0000ULL) >> 18)\r
+#define L2_IDX(_a) (((_a) & 0x000000000003fe00ULL) >> 9)\r
+#define L3_IDX(_a) (((_a) & 0x00000000000001ffULL))\r
+\r
+\r
+\r
+//#define STANDALONE\r
+\r
+#if 0\r
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )\r
+#else\r
+#define DPRINTF(_f, _a...) ((void)0)\r
+#endif\r
+\r
+\r
+struct io_req {\r
+    enum { IO_OP_READ, IO_OP_WRITE } op;\r
+    u64        root;\r
+    u64        vaddr;\r
+    int        state;\r
+    io_cb_t    cb;\r
+    void      *param;\r
+    struct radix_lock *lock;\r
+\r
+    /* internal stuff: */\r
+    struct io_ret    retval;/* holds the return while we unlock. */\r
+    char            *block; /* the block to write */\r
+    radix_tree_node  radix[3];\r
+    u64              radix_addr[3];\r
+};\r
+\r
+void clear_w_bits(radix_tree_node node) \r
+{\r
+       int i;\r
+       for (i=0; i<RADIX_TREE_MAP_ENTRIES; i++)\r
+               node[i] = node[i] & ONEMASK;\r
+       return;\r
+}\r
+\r
+enum states {\r
+    /* both */\r
+    READ_L1,\r
+    READ_L2,\r
+    READ_L3,\r
+\r
+    /* read */\r
+    READ_LOCKED,\r
+    READ_DATA,\r
+    READ_UNLOCKED,\r
+    RETURN_ZERO,\r
+\r
+    /* write */\r
+    WRITE_LOCKED,\r
+    WRITE_DATA,\r
+    WRITE_UNLOCKED,\r
+    \r
+    /* L3 Zero Path */\r
+    ALLOC_DATA_L3z,\r
+    WRITE_L3_L3z,\r
+    \r
+    /* L3 Fault Path */\r
+    ALLOC_DATA_L3f,\r
+    WRITE_L3_L3f,\r
+    \r
+    /* L2 Zero Path */\r
+    ALLOC_DATA_L2z,\r
+    WRITE_L2_L2z,\r
+    ALLOC_L3_L2z,\r
+    WRITE_L2_L3z,\r
+    \r
+    /* L2 Fault Path */\r
+    READ_L3_L2f,\r
+    ALLOC_DATA_L2f,\r
+    WRITE_L2_L2f,\r
+    ALLOC_L3_L2f,\r
+    WRITE_L2_L3f,\r
+\r
+       /* L1 Zero Path */\r
+    ALLOC_DATA_L1z,\r
+    ALLOC_L3_L1z,\r
+    ALLOC_L2_L1z,\r
+    WRITE_L1_L1z,\r
+\r
+       /* L1 Fault Path */\r
+       READ_L2_L1f,\r
+       READ_L3_L1f,\r
+    ALLOC_DATA_L1f,\r
+    ALLOC_L3_L1f,\r
+    ALLOC_L2_L1f,\r
+    WRITE_L1_L1f,\r
+    \r
+};\r
+\r
+enum radix_offsets {\r
+    L1 = 0, \r
+    L2 = 1,\r
+    L3 = 2\r
+};\r
+\r
+\r
+static void read_cb(struct io_ret ret, void *param);\r
+static void write_cb(struct io_ret ret, void *param);\r
+\r
+\r
+int async_read(vdi_t *vdi, u64 vaddr, io_cb_t cb, void *param)\r
+{\r
+    struct io_req *req;\r
+\r
+    DPRINTF("async_read\n");\r
+\r
+    req = (struct io_req *)malloc(sizeof (struct io_req));\r
+       req->radix[0] = req->radix[1] = req->radix[2] = NULL;\r
+\r
+       if (req == NULL) {perror("req was NULL in async_read"); return(-1); }\r
+       \r
+    req->op    = IO_OP_READ;\r
+    req->root  = vdi->radix_root;\r
+    req->lock  = vdi->radix_lock; \r
+    req->vaddr = vaddr;\r
+    req->cb    = cb;\r
+    req->param = param;\r
+    req->state = READ_LOCKED;\r
+\r
+       block_rlock(req->lock, L1_IDX(vaddr), read_cb, req);\r
+       \r
+    return 0;\r
+}\r
+\r
+\r
+int   async_write(vdi_t *vdi, u64 vaddr, char *block, \r
+                  io_cb_t cb, void *param)\r
+{\r
+    struct io_req *req;\r
+\r
+\r
+    req = (struct io_req *)malloc(sizeof (struct io_req));\r
+       req->radix[0] = req->radix[1] = req->radix[2] = NULL;\r
+    //DPRINTF("async_write\n");\r
+    \r
+       if (req == NULL) {perror("req was NULL in async_write"); return(-1); }\r
+\r
+    req->op    = IO_OP_WRITE;\r
+    req->root  = vdi->radix_root;\r
+    req->lock  = vdi->radix_lock; \r
+    req->vaddr = vaddr;\r
+    req->block = block;\r
+    req->cb    = cb;\r
+    req->param = param;\r
+    req->radix_addr[L1] = getid(req->root); /* for consistency */\r
+    req->state = WRITE_LOCKED;\r
+\r
+       block_wlock(req->lock, L1_IDX(vaddr), write_cb, req);\r
+\r
+\r
+       return 0;\r
+}\r
+\r
+void read_cb(struct io_ret ret, void *param)\r
+{\r
+    struct io_req *req = (struct io_req *)param;\r
+    radix_tree_node node;\r
+    u64 idx;\r
+    char *block;\r
+    void *req_param;\r
+\r
+    DPRINTF("read_cb\n");\r
+    /* get record */\r
+    switch(req->state) {\r
+       \r
+    case READ_LOCKED: \r
+    \r
+        DPRINTF("READ_LOCKED\n");\r
+       req->state = READ_L1;\r
+       block_read(getid(req->root), read_cb, req); \r
+       break;\r
+       \r
+    case READ_L1: /* block is the radix root */\r
+\r
+        DPRINTF("READ_L1\n");\r
+        block = IO_BLOCK(ret);\r
+        if (block == NULL) goto fail;\r
+        node = (radix_tree_node) block;\r
+        idx  = getid( node[L1_IDX(req->vaddr)] );\r
+        free(block);\r
+        if ( idx == ZERO ) {\r
+               req->state = RETURN_ZERO;\r
+               block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);\r
+        } else {\r
+               req->state = READ_L2;\r
+               block_read(idx, read_cb, req);\r
+        }\r
+        break;\r
+\r
+    case READ_L2:\r
+\r
+        DPRINTF("READ_L2\n");\r
+        block = IO_BLOCK(ret);\r
+        if (block == NULL) goto fail;\r
+        node = (radix_tree_node) block;\r
+        idx  = getid( node[L2_IDX(req->vaddr)] );\r
+        free(block);\r
+        if ( idx == ZERO ) {\r
+               req->state = RETURN_ZERO;\r
+               block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);\r
+        } else {\r
+               req->state = READ_L3;\r
+               block_read(idx, read_cb, req);\r
+        }\r
+        break;\r
+\r
+    case READ_L3:\r
+    \r
+        DPRINTF("READ_L3\n");\r
+        block = IO_BLOCK(ret);\r
+        if (block == NULL) goto fail;\r
+        node = (radix_tree_node) block;\r
+        idx  = getid( node[L3_IDX(req->vaddr)] );\r
+        free(block);\r
+        if ( idx == ZERO )  {\r
+               req->state = RETURN_ZERO;\r
+               block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);\r
+        } else {\r
+               req->state = READ_DATA;\r
+               block_read(idx, read_cb, req);\r
+        }\r
+        break;\r
+\r
+    case READ_DATA:\r
+    \r
+        DPRINTF("READ_DATA\n");\r
+        if (IO_BLOCK(ret) == NULL) goto fail;\r
+        req->retval = ret;\r
+        req->state = READ_UNLOCKED;\r
+        block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);\r
+        break;\r
+        \r
+    case READ_UNLOCKED:\r
+       {\r
+               struct io_ret r;\r
+               io_cb_t cb;\r
+        DPRINTF("READ_UNLOCKED\n");\r
+        req_param = req->param;\r
+        r         = req->retval;\r
+        cb        = req->cb;\r
+        free(req);\r
+        cb(r, req_param);\r
+        break;\r
+    }\r
+    \r
+    case RETURN_ZERO:\r
+       {\r
+               struct io_ret r;\r
+               io_cb_t cb;\r
+           DPRINTF("RETURN_ZERO\n");\r
+           req_param = req->param;\r
+        cb        = req->cb;\r
+           free(req);\r
+        r.type = IO_BLOCK_T;\r
+        r.u.b = newblock();\r
+           cb(r, req_param);\r
+           break;\r
+       }\r
+        \r
+    default:\r
+       DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);\r
+       goto fail;\r
+    }\r
\r
+    return;\r
+\r
+ fail:\r
+       {\r
+               struct io_ret r;\r
+               io_cb_t cb;\r
+               DPRINTF("asyn_read had a read error.\n");\r
+        req_param = req->param;\r
+        r         = ret;\r
+        cb        = req->cb;\r
+        free(req);\r
+        cb(r, req_param);\r
+       }\r
+\r
+\r
+}\r
+\r
+void write_cb(struct io_ret r, void *param)\r
+{\r
+    struct io_req *req = (struct io_req *)param;\r
+    radix_tree_node node;\r
+    u64 a, addr;\r
+    void *req_param;\r
+\r
+    //DPRINTF("write_cb\n");\r
+    switch(req->state) {\r
+       \r
+    case WRITE_LOCKED:\r
+    \r
+        DPRINTF("WRITE_LOCKED (%llu)\n", L1_IDX(req->vaddr));\r
+       req->state = READ_L1;\r
+       block_read(getid(req->root), write_cb, req); \r
+       break;\r
+       \r
+    case READ_L1: /* block is the radix root */\r
+\r
+        DPRINTF("READ_L1\n");\r
+        node = (radix_tree_node) IO_BLOCK(r);\r
+        if (node == NULL) goto fail;\r
+        a    = node[L1_IDX(req->vaddr)];\r
+        addr = getid(a);\r
+\r
+        req->radix_addr[L2] = addr;\r
+        req->radix[L1] = node;\r
+\r
+        if ( addr == ZERO ) {\r
+               /* L1 empty subtree: */\r
+               req->state = ALLOC_DATA_L1z;\r
+               block_alloc( req->block, write_cb, req );\r
+        } else if ( !iswritable(a) ) {\r
+            /* L1 fault: */\r
+            req->state = READ_L2_L1f;\r
+            block_read( addr, write_cb, req );\r
+        } else {\r
+            req->state = READ_L2;\r
+            block_read( addr, write_cb, req );\r
+        }\r
+        break;\r
+    \r
+    case READ_L2:\r
+\r
+        DPRINTF("READ_L2\n");\r
+        node = (radix_tree_node) IO_BLOCK(r);\r
+        if (node == NULL) goto fail;\r
+        a    = node[L2_IDX(req->vaddr)];\r
+        addr = getid(a);\r
+\r
+        req->radix_addr[L3] = addr;\r
+        req->radix[L2] = node;\r
+\r
+        if ( addr == ZERO ) {\r
+               /* L2 empty subtree: */\r
+            req->state = ALLOC_DATA_L2z;\r
+            block_alloc( req->block, write_cb, req );\r
+        } else if ( !iswritable(a) ) {\r
+            /* L2 fault: */\r
+            req->state = READ_L3_L2f;\r
+            block_read( addr, write_cb, req );\r
+        } else {\r
+            req->state = READ_L3;\r
+            block_read( addr, write_cb, req );\r
+        }\r
+        break;\r
+    \r
+    case READ_L3:\r
+\r
+        DPRINTF("READ_L3\n");\r
+        node = (radix_tree_node) IO_BLOCK(r);\r
+        if (node == NULL) goto fail;\r
+        a    = node[L3_IDX(req->vaddr)];\r
+        addr = getid(a);\r
+\r
+        req->radix[L3] = node;\r
+\r
+        if ( addr == ZERO ) {\r
+            /* L3 fault: */\r
+            req->state = ALLOC_DATA_L3z;\r
+            block_alloc( req->block, write_cb, req );\r
+        } else if ( !iswritable(a) ) {\r
+            /* L3 fault: */\r
+            req->state = ALLOC_DATA_L3f;\r
+            block_alloc( req->block, write_cb, req );\r
+        } else {\r
+            req->state = WRITE_DATA;\r
+            block_write( addr, req->block, write_cb, req );\r
+        }\r
+        break;\r
+    \r
+    /* L3 Zero Path: */\r
+\r
+    case ALLOC_DATA_L3z:\r
+\r
+        DPRINTF("ALLOC_DATA_L3z\n");\r
+        addr = IO_ADDR(r);\r
+        a = writable(addr);\r
+        req->radix[L3][L3_IDX(req->vaddr)] = a;\r
+        req->state = WRITE_L3_L3z;\r
+        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);\r
+        break;\r
+    \r
+    /* L3 Fault Path: */\r
+\r
+    case ALLOC_DATA_L3f:\r
+\r
+        DPRINTF("ALLOC_DATA_L3f\n");\r
+        addr = IO_ADDR(r);\r
+        a = writable(addr);\r
+        req->radix[L3][L3_IDX(req->vaddr)] = a;\r
+        req->state = WRITE_L3_L3f;\r
+        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);\r
+        break;\r
+\r
+    /* L2 Zero Path: */\r
+        \r
+    case ALLOC_DATA_L2z:\r
+\r
+        DPRINTF("ALLOC_DATA_L2z\n");\r
+        addr = IO_ADDR(r);\r
+        a = writable(addr);\r
+        req->radix[L3] = newblock();\r
+        req->radix[L3][L3_IDX(req->vaddr)] = a;\r
+        req->state = ALLOC_L3_L2z;\r
+        block_alloc( (char*)req->radix[L3], write_cb, req );\r
+        break;\r
+\r
+    case ALLOC_L3_L2z:\r
+\r
+        DPRINTF("ALLOC_L3_L2z\n");\r
+        addr = IO_ADDR(r);\r
+        a = writable(addr);\r
+        req->radix[L2][L2_IDX(req->vaddr)] = a;\r
+        req->state = WRITE_L2_L2z;\r
+        block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);\r
+        break;\r
+        \r
+    /* L2 Fault Path: */\r
+        \r
+    case READ_L3_L2f:\r
+    \r
+       DPRINTF("READ_L3_L2f\n");\r
+        node = (radix_tree_node) IO_BLOCK(r);\r
+        clear_w_bits(node);\r
+        if (node == NULL) goto fail;\r
+        a    = node[L2_IDX(req->vaddr)];\r
+        addr = getid(a);\r
+\r
+        req->radix[L3] = node;\r
+               req->state = ALLOC_DATA_L2f;\r
+        block_alloc( req->block, write_cb, req );\r
+        break;\r
+                \r
+    case ALLOC_DATA_L2f:\r
+\r
+        DPRINTF("ALLOC_DATA_L2f\n");\r
+        addr = IO_ADDR(r);\r
+        a = writable(addr);\r
+        req->radix[L3][L3_IDX(req->vaddr)] = a;\r
+        req->state = ALLOC_L3_L2f;\r
+        block_alloc( (char*)req->radix[L3], write_cb, req );\r
+        break;\r
+\r
+    case ALLOC_L3_L2f:\r
+\r
+        DPRINTF("ALLOC_L3_L2f\n");\r
+        addr = IO_ADDR(r);\r
+        a = writable(addr);\r
+        req->radix[L2][L2_IDX(req->vaddr)] = a;\r
+        req->state = WRITE_L2_L2f;\r
+        block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);\r
+        break;\r
+        \r
+    /* L1 Zero Path: */\r
+    \r
+    case ALLOC_DATA_L1z:\r
+\r
+        DPRINTF("ALLOC_DATA_L1z\n");\r
+        addr = IO_ADDR(r);\r
+        a = writable(addr);\r
+        req->radix[L3] = newblock();\r
+        req->radix[L3][L3_IDX(req->vaddr)] = a;\r
+        req->state = ALLOC_L3_L1z;\r
+        block_alloc( (char*)req->radix[L3], write_cb, req );\r
+        break;\r
+\r
+    case ALLOC_L3_L1z:\r
+\r
+        DPRINTF("ALLOC_L3_L1z\n");\r
+        addr = IO_ADDR(r);\r
+        a = writable(addr);\r
+        req->radix[L2] = newblock();\r
+        req->radix[L2][L2_IDX(req->vaddr)] = a;\r
+        req->state = ALLOC_L2_L1z;\r
+        block_alloc( (char*)req->radix[L2], write_cb, req );\r
+        break;\r
+\r
+    case ALLOC_L2_L1z:\r
+\r
+        DPRINTF("ALLOC_L2_L1z\n");\r
+        addr = IO_ADDR(r);\r
+        a = writable(addr);\r
+        req->radix[L1][L1_IDX(req->vaddr)] = a;\r
+        req->state = WRITE_L1_L1z;\r
+        block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);\r
+        break;\r
+\r
+    /* L1 Fault Path: */\r
+        \r
+    case READ_L2_L1f:\r
+    \r
+       DPRINTF("READ_L2_L1f\n");\r
+        node = (radix_tree_node) IO_BLOCK(r);\r
+        clear_w_bits(node);\r
+        if (node == NULL) goto fail;\r
+        a    = node[L2_IDX(req->vaddr)];\r
+        addr = getid(a);\r
+\r
+        req->radix_addr[L3] = addr;\r
+        req->radix[L2] = node;\r
+        \r
+        if (addr == ZERO) {\r
+               /* nothing below L2, create an empty L3 and alloc data. */\r
+               /* (So skip READ_L3_L1f.) */\r
+               req->radix[L3] = newblock();\r
+               req->state = ALLOC_DATA_L1f;\r
+               block_alloc( req->block, write_cb, req );\r
+        } else {\r
+                       req->state = READ_L3_L1f;\r
+                       block_read( addr, write_cb, req );\r
+        }\r
+        break;\r
+        \r
+    case READ_L3_L1f:\r
+    \r
+       DPRINTF("READ_L3_L1f\n");\r
+        node = (radix_tree_node) IO_BLOCK(r);\r
+        clear_w_bits(node);\r
+        if (node == NULL) goto fail;\r
+        a    = node[L2_IDX(req->vaddr)];\r
+        addr = getid(a);\r
+\r
+        req->radix[L3] = node;\r
+               req->state = ALLOC_DATA_L1f;\r
+        block_alloc( req->block, write_cb, req );\r
+        break;\r
+                \r
+    case ALLOC_DATA_L1f:\r
+\r
+        DPRINTF("ALLOC_DATA_L1f\n");\r
+        addr = IO_ADDR(r);\r
+        a = writable(addr);\r
+        req->radix[L3][L3_IDX(req->vaddr)] = a;\r
+        req->state = ALLOC_L3_L1f;\r
+        block_alloc( (char*)req->radix[L3], write_cb, req );\r
+        break;\r
+\r
+    case ALLOC_L3_L1f:\r
+\r
+        DPRINTF("ALLOC_L3_L1f\n");\r
+        addr = IO_ADDR(r);\r
+        a = writable(addr);\r
+        req->radix[L2][L2_IDX(req->vaddr)] = a;\r
+        req->state = ALLOC_L2_L1f;\r
+        block_alloc( (char*)req->radix[L2], write_cb, req );\r
+        break;\r
+\r
+    case ALLOC_L2_L1f:\r
+\r
+        DPRINTF("ALLOC_L2_L1f\n");\r
+        addr = IO_ADDR(r);\r
+        a = writable(addr);\r
+        req->radix[L1][L1_IDX(req->vaddr)] = a;\r
+        req->state = WRITE_L1_L1f;\r
+        block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);\r
+        break;\r
+\r
+    case WRITE_DATA:\r
+    case WRITE_L3_L3z:\r
+    case WRITE_L3_L3f:\r
+    case WRITE_L2_L2z:\r
+    case WRITE_L2_L2f:\r
+    case WRITE_L1_L1z:\r
+    case WRITE_L1_L1f:\r
+    {\r
+       int i;\r
+        DPRINTF("DONE\n");\r
+        /* free any saved node vals. */\r
+        for (i=0; i<3; i++)\r
+               if (req->radix[i] != 0) free(req->radix[i]);\r
+        req->retval = r;\r
+        req->state = WRITE_UNLOCKED;\r
+        block_wunlock(req->lock, L1_IDX(req->vaddr), write_cb, req);\r
+        break;\r
+    }\r
+    case WRITE_UNLOCKED:\r
+    {\r
+               struct io_ret r;\r
+               io_cb_t cb;\r
+        DPRINTF("WRITE_UNLOCKED!\n");\r
+        req_param = req->param;\r
+        r         = req->retval;\r
+        cb        = req->cb;\r
+           free(req);\r
+        cb(r, req_param);\r
+        break;\r
+    }\r
+        \r
+    default:\r
+       DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);\r
+       goto fail;\r
+    }\r
+    \r
+    return;\r
+    \r
+ fail:\r
+       {\r
+               struct io_ret r;\r
+               io_cb_t cb;\r
+               DPRINTF("asyn_write had a read error mid-way.\n");\r
+        req_param = req->param;\r
+        cb        = req->cb;\r
+        r.type = IO_INT_T;\r
+        r.u.i  = -1;\r
+        free(req);\r
+        cb(r, req_param);\r
+       }\r
+}\r
+\r
diff --git a/tools/blktap/requests-async.h b/tools/blktap/requests-async.h
new file mode 100755 (executable)
index 0000000..503a543
--- /dev/null
@@ -0,0 +1,19 @@
+#ifndef _REQUESTSASYNC_H_\r
+#define _REQUESTSASYNC_H_\r
+\r
+#include "block-async.h"\r
+#include "blockstore.h" /* for newblock etc. */\r
+\r
+/*\r
+#define BLOCK_SIZE 4096\r
+#define ZERO 0ULL\r
+#define getid(x) (((x)>>1)&0x7fffffffffffffffLLU)\r
+#define iswritable(x) (((x) & 1LLU) != 0)\r
+#define writable(x) (((x) << 1) | 1LLU)\r
+#define readonly(x) ((u64)((x) << 1))\r
+*/\r
+\r
+int async_read (vdi_t *vdi, u64 vaddr, io_cb_t cb, void *param);\r
+int async_write(vdi_t *vdi, u64 vaddr, char *block, io_cb_t cb, void *param);\r
+             \r
+#endif //_REQUESTSASYNC_H_\r
index b3a84f024491e20d94c3facf53edad591262db9b..490a2e691ace5c4e57cadf7c2812d3906d9385c0 100644 (file)
 #include <fcntl.h>
 #include <string.h>
 #include <sys/time.h>
+#include <pthread.h>
 #include "blockstore.h"
+#include "block-async.h"
 #include "radix.h"
 #include "vdi.h"
                     
 #define VDI_REG_BLOCK   2LL
 #define VDI_RADIX_ROOT  writable(3)
                                                             
-#if 1
+#if 0
 #define DPRINTF(_f, _a...) printf ( _f , ## _a )
 #else
 #define DPRINTF(_f, _a...) ((void)0)
@@ -66,6 +68,7 @@ vdi_registry_t *get_vdi_registry(void)
     return vdi_reg;
 }
 
+
 vdi_t *vdi_create(snap_id_t *parent_snap, char *name)
 {
     int ret;
@@ -106,12 +109,22 @@ vdi_t *vdi_create(snap_id_t *parent_snap, char *name)
     vdi->id    = vdi_reg->nr_vdis++;
     strncpy(vdi->name, name, VDI_NAME_SZ);
     vdi->name[VDI_NAME_SZ] = '\0';
+    vdi->radix_lock = NULL; /* for tidiness */
     writeblock(vdi->block, (void *)vdi);
     
     update(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi->id, vdi->block);
     writeblock(VDI_REG_BLOCK, (void *)vdi_reg);
     freeblock(vdi_reg);
     
+    vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
+    if (vdi->radix_lock == NULL) 
+    {
+       perror("couldn't malloc radix_lock for new vdi!");
+       freeblock(vdi);
+       return NULL;
+    }
+    radix_lock_init(vdi->radix_lock);
+    
     return vdi;
 }
 
@@ -126,6 +139,16 @@ vdi_t *vdi_get(u64 vdi_id)
         return NULL;
     
     vdi = (vdi_t *)readblock(vdi_blk);
+    
+    vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
+    if (vdi->radix_lock == NULL) 
+    {
+       perror("couldn't malloc radix_lock for new vdi!");
+       freeblock(vdi);
+       return NULL;
+    }
+    radix_lock_init(vdi->radix_lock);
+    
     return vdi;
 }
 
index dd32102dadb366d7d9598400d71a70977fc918bd..1c04e79393cd2760ad2b43cf9b91763c917e034a 100644 (file)
@@ -1,3 +1,5 @@
+#ifndef _VDI_H_
+#define _VDI_H_
 /**************************************************************************
  * 
  * vdi.h
 #include "blktaplib.h"
 #include "snaplog.h"
 
-#define VDI_HEIGHT     35
-#define VDI_REG_HEIGHT 35 /* why not? */
+#define VDI_HEIGHT     27 /* Note that these are now hard-coded */
+#define VDI_REG_HEIGHT 27 /* in the async lookup code           */
 
 #define VDI_NAME_SZ 256
 
+
 typedef struct vdi {
     u64         id;               /* unique vdi id -- used by the registry   */
     u64         block;            /* block where this vdi lives (also unique)*/
@@ -24,6 +27,7 @@ typedef struct vdi {
     snap_id_t   snap;             /* next snapshot slot for this VDI         */
     struct vdi *next;             /* used to hash-chain in blkif.            */
     blkif_vdev_t vdevice;         /* currently mounted as...                 */
+    struct radix_lock *radix_lock;/* per-line L1 RW lock for parallel reqs   */ 
     char        name[VDI_NAME_SZ];/* human readable vdi name                 */
 } vdi_t;
 
@@ -46,3 +50,5 @@ void vdi_snapshot(vdi_t *vdi);
 
 
 #endif /* __VDI_H__ */
+
+#endif //_VDI_H_